diff options
-rw-r--r-- | src/vulkan/Makefile.am | 7 | ||||
-rw-r--r-- | src/vulkan/anv_compiler.cpp | 891 | ||||
-rw-r--r-- | src/vulkan/anv_device.c | 25 | ||||
-rw-r--r-- | src/vulkan/anv_pipeline.c | 646 | ||||
-rw-r--r-- | src/vulkan/anv_private.h | 12 | ||||
-rw-r--r-- | src/vulkan/gen8_pipeline.c | 22 |
6 files changed, 661 insertions, 942 deletions
diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 5abbd379b54..8538046e567 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -55,14 +55,10 @@ libvulkan_la_CFLAGS = \ -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g \ -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init -libvulkan_la_CXXFLAGS = \ - -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g - VULKAN_SOURCES = \ anv_allocator.c \ anv_cmd_buffer.c \ anv_batch_chain.c \ - anv_compiler.cpp \ anv_device.c \ anv_dump.c \ anv_entrypoints.c \ @@ -124,7 +120,7 @@ libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ ../mesa/libmesa.la \ ../mesa/drivers/dri/common/libdri_test_stubs.la \ - -lpthread -ldl + -lpthread -ldl -lstdc++ # Libvulkan with dummy gem. Used for unit tests. @@ -133,7 +129,6 @@ libvulkan_test_la_SOURCES = \ anv_gem_stubs.c libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) -libvulkan_test_la_CXXFLAGS = $(libvulkan_la_CXXFLAGS) libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp deleted file mode 100644 index 1ecd88a0331..00000000000 --- a/src/vulkan/anv_compiler.cpp +++ /dev/null @@ -1,891 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> - -#include "anv_private.h" -#include "anv_nir.h" - -#include <brw_context.h> -#include <brw_wm.h> /* brw_new_shader_program is here */ -#include <brw_nir.h> - -#include <brw_vs.h> -#include <brw_gs.h> -#include <brw_cs.h> -#include "brw_vec4_gs_visitor.h" -#include <brw_compiler.h> - -#include <mesa/main/shaderobj.h> -#include <mesa/main/fbobject.h> -#include <mesa/main/context.h> -#include <mesa/program/program.h> -#include <glsl/program.h> - -/* XXX: We need this to keep symbols in nir.h from conflicting with the - * generated GEN command packing headers. We need to fix *both* to not - * define something as generic as LOAD. - */ -#undef LOAD - -#include <glsl/nir/nir_spirv.h> - -#define SPIR_V_MAGIC_NUMBER 0x07230203 - -static void -fail_if(int cond, const char *format, ...) -{ - va_list args; - - if (!cond) - return; - - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - - exit(1); -} - -static VkResult -set_binding_table_layout(struct brw_stage_prog_data *prog_data, - struct anv_pipeline *pipeline, uint32_t stage) -{ - unsigned bias; - if (stage == VK_SHADER_STAGE_FRAGMENT) - bias = MAX_RTS; - else - bias = 0; - - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.image_start = bias; - - return VK_SUCCESS; -} - -static uint32_t -upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) -{ - struct anv_state state = - anv_state_stream_alloc(&pipeline->program_stream, size, 64); - - assert(size < pipeline->program_stream.block_pool->block_size); - - memcpy(state.map, data, size); - - return state.offset; -} - -static void -create_params_array(struct anv_pipeline *pipeline, - struct gl_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage); - unsigned num_params = 0; - - if (shader->num_uniform_components) { - /* If the shader uses any push constants at all, we'll just give - * them the maximum possible number - */ - num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } - - if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - num_params += MAX_DYNAMIC_BUFFERS; - - if (num_params == 0) - return; - - prog_data->param = (const gl_constant_value **) - anv_device_alloc(pipeline->device, - num_params * sizeof(gl_constant_value *), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); - - /* We now set the param values to be offsets into a - * anv_push_constant_data structure. Since the compiler doesn't - * actually dereference any of the gl_constant_value pointers in the - * params array, it doesn't really matter what we put here. - */ - struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < num_params; i++) - prog_data->param[i] = - (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; -} - -static void -brw_vs_populate_key(struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* XXX: Handle vertex input work-arounds */ - - /* XXX: Handle sampler_prog_key */ -} - -static bool -really_do_vs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) -{ - GLuint program_size; - const GLuint *program; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - void *mem_ctx; - struct gl_shader *vs = NULL; - - if (prog) - vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; - - memset(prog_data, 0, sizeof(*prog_data)); - - mem_ctx = ralloc_context(NULL); - - create_params_array(pipeline, vs, &prog_data->base.base); - anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, - &prog_data->base.base); - anv_nir_apply_pipeline_layout(vs->Program->nir, pipeline->layout); - - prog_data->inputs_read = vp->program.Base.InputsRead; - - brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data->base.vue_map, - vp->program.Base.OutputsWritten, - prog ? prog->SeparateShader : false); - - set_binding_table_layout(&prog_data->base.base, pipeline, - VK_SHADER_STAGE_VERTEX); - - /* Emit GEN4 code. - */ - program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, - key, prog_data, vs->Program->nir, NULL, false, -1, - &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - const uint32_t offset = upload_kernel(pipeline, program, program_size); - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = offset; - pipeline->vs_vec4 = NO_KERNEL; - } else { - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = offset; - } - - ralloc_free(mem_ctx); - - return true; -} - -void brw_wm_populate_key(struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - bool program_uses_dfdy = fp->program.UsesDFdy; - struct gl_framebuffer draw_buffer; - bool multisample_fbo; - - memset(key, 0, sizeof(*key)); - - for (int i = 0; i < MAX_SAMPLERS; i++) { - /* Assume color sampler, no swizzling. */ - key->tex.swizzles[i] = SWIZZLE_XYZW; - } - - /* A non-zero framebuffer name indicates that the framebuffer was created by - * the user rather than the window system. */ - draw_buffer.Name = 1; - draw_buffer.Visual.samples = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer.Width = 400; - draw_buffer.Height = 400; - ctx->DrawBuffer = &draw_buffer; - - multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; - - /* _NEW_HINT */ - key->high_quality_derivatives = - ctx->Hint.FragmentShaderDerivative == GL_NICEST; - - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ - key->clamp_fragment_color = ctx->Color._ClampFragmentColor; - - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * This is only needed for the WM_WPOSXY opcode when the fragment program - * uses the gl_FragCoord input. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * For DRI2 the origin_x/y will always be (0,0) but we still need the - * drawable height in order to invert the Y axis. - */ - if (fp->program.Base.InputsRead & VARYING_BIT_POS) { - key->drawable_height = ctx->DrawBuffer->Height; - } - - if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { - key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - } - - /* _NEW_BUFFERS */ - key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; - - /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ - key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); - - /* _NEW_BUFFERS _NEW_MULTISAMPLE */ - /* Ignore sample qualifier while computing this flag. */ - key->persample_shading = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; - if (key->persample_shading) - key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; - - key->compute_pos_offset = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && - fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; - - key->compute_sample_id = - multisample_fbo && - ctx->Multisample.Enabled && - (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); - - /* The unique fragment program ID */ - key->program_string_id = fp->id; - - ctx->DrawBuffer = NULL; -} - -static bool -really_do_wm_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) -{ - void *mem_ctx = ralloc_context(NULL); - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; - struct gl_shader *fs = NULL; - unsigned int program_size; - const uint32_t *program; - - if (prog) - fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - memset(prog_data, 0, sizeof(*prog_data)); - - create_params_array(pipeline, fs, &prog_data->base); - anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); - anv_nir_apply_pipeline_layout(fs->Program->nir, pipeline->layout); - - set_binding_table_layout(&prog_data->base, pipeline, - VK_SHADER_STAGE_FRAGMENT); - /* This needs to come after shader time and pull constant entries, but we - * don't have those set up now, so just put it after the layout entries. - */ - prog_data->binding_table.render_target_start = 0; - - program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, key, - prog_data, fp->program.Base.nir, fs->Program, - -1, -1, brw->use_rep_send, &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - uint32_t offset = upload_kernel(pipeline, program, program_size); - - if (prog_data->no_8) - pipeline->ps_simd8 = NO_KERNEL; - else - pipeline->ps_simd8 = offset; - - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = offset + prog_data->prog_offset_16; - } else { - pipeline->ps_simd16 = NO_KERNEL; - } - - ralloc_free(mem_ctx); - - return true; -} - -static bool -brw_codegen_cs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_compute_program *cp, - struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) -{ - const GLuint *program; - void *mem_ctx = ralloc_context(NULL); - GLuint program_size; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - assert (cs); - - memset(prog_data, 0, sizeof(*prog_data)); - - set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); - - create_params_array(pipeline, cs, &prog_data->base); - anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); - anv_nir_apply_pipeline_layout(cs->Program->nir, pipeline->layout); - - program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, key, - prog_data, cs->Program->nir, -1, - &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - fprintf(stderr, "\n"); - - pipeline->cs_simd = upload_kernel(pipeline, program, program_size); - - ralloc_free(mem_ctx); - - return true; -} - -static void -brw_cs_populate_key(struct brw_context *brw, - struct brw_compute_program *bcp, struct brw_cs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* The unique compute program ID */ - key->program_string_id = bcp->id; -} - -struct anv_compiler { - struct anv_device *device; - struct intel_screen *screen; - struct brw_context *brw; - struct gl_pipeline_object pipeline; -}; - -extern "C" { - -struct anv_compiler * -anv_compiler_create(struct anv_device *device) -{ - const struct brw_device_info *devinfo = &device->info; - struct anv_compiler *compiler; - struct gl_context *ctx; - - compiler = rzalloc(NULL, struct anv_compiler); - if (compiler == NULL) - return NULL; - - compiler->screen = rzalloc(compiler, struct intel_screen); - if (compiler->screen == NULL) - goto fail; - - compiler->brw = rzalloc(compiler, struct brw_context); - if (compiler->brw == NULL) - goto fail; - - compiler->device = device; - - compiler->brw->gen = devinfo->gen; - compiler->brw->is_g4x = devinfo->is_g4x; - compiler->brw->is_baytrail = devinfo->is_baytrail; - compiler->brw->is_haswell = devinfo->is_haswell; - compiler->brw->is_cherryview = devinfo->is_cherryview; - - /* We need this at least for CS, which will check brw->max_cs_threads - * against the work group size. */ - compiler->brw->max_vs_threads = devinfo->max_vs_threads; - compiler->brw->max_hs_threads = devinfo->max_hs_threads; - compiler->brw->max_ds_threads = devinfo->max_ds_threads; - compiler->brw->max_gs_threads = devinfo->max_gs_threads; - compiler->brw->max_wm_threads = devinfo->max_wm_threads; - compiler->brw->max_cs_threads = devinfo->max_cs_threads; - compiler->brw->urb.size = devinfo->urb.size; - compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; - compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; - compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; - compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; - compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - - compiler->brw->intelScreen = compiler->screen; - compiler->screen->devinfo = &device->info; - - brw_process_intel_debug_variable(); - - compiler->screen->compiler = device->instance->physicalDevice.compiler; - - ctx = &compiler->brw->ctx; - _mesa_init_shader_object_functions(&ctx->Driver); - - /* brw_select_clip_planes() needs this for bogus reasons. */ - ctx->_Shader = &compiler->pipeline; - - return compiler; - - fail: - ralloc_free(compiler); - return NULL; -} - -void -anv_compiler_destroy(struct anv_compiler *compiler) -{ - _mesa_free_errors_data(&compiler->brw->ctx); - ralloc_free(compiler); -} - -/* From gen7_urb.c */ - -/* FIXME: Add to struct intel_device_info */ - -static const int gen8_push_size = 32 * 1024; - -static void -gen7_compute_urb_partition(struct anv_pipeline *pipeline) -{ - const struct brw_device_info *devinfo = &pipeline->device->info; - bool vs_present = pipeline->vs_simd8 != NO_KERNEL; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; - unsigned vs_entry_size_bytes = vs_size * 64; - bool gs_present = pipeline->gs_vec4 != NO_KERNEL; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; - unsigned gs_entry_size_bytes = gs_size * 64; - - /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): - * - * VS Number of URB Entries must be divisible by 8 if the VS URB Entry - * Allocation Size is less than 9 512-bit URB entries. - * - * Similar text exists for GS. - */ - unsigned vs_granularity = (vs_size < 9) ? 8 : 1; - unsigned gs_granularity = (gs_size < 9) ? 8 : 1; - - /* URB allocations must be done in 8k chunks. */ - unsigned chunk_size_bytes = 8192; - - /* Determine the size of the URB in chunks. */ - unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; - - /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; - unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - - /* Initially, assign each stage the minimum amount of URB space it needs, - * and make a note of how much additional space it "wants" (the amount of - * additional space it could actually make use of). - */ - - /* VS has a lower limit on the number of URB entries */ - unsigned vs_chunks = - ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - unsigned vs_wants = - ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - vs_chunks; - - unsigned gs_chunks = 0; - unsigned gs_wants = 0; - if (gs_present) { - /* There are two constraints on the minimum amount of URB space we can - * allocate: - * - * (1) We need room for at least 2 URB entries, since we always operate - * the GS in DUAL_OBJECT mode. - * - * (2) We can't allocate less than nr_gs_entries_granularity. - */ - gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - gs_wants = - ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - gs_chunks; - } - - /* There should always be enough URB space to satisfy the minimum - * requirements of each stage. - */ - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; - assert(total_needs <= urb_chunks); - - /* Mete out remaining space (if any) in proportion to "wants". */ - unsigned total_wants = vs_wants + gs_wants; - unsigned remaining_space = urb_chunks - total_needs; - if (remaining_space > total_wants) - remaining_space = total_wants; - if (remaining_space > 0) { - unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); - vs_chunks += vs_additional; - remaining_space -= vs_additional; - gs_chunks += remaining_space; - } - - /* Sanity check that we haven't over-allocated. */ - assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); - - /* Finally, compute the number of entries that can fit in the space - * allocated to each stage. - */ - unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; - unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; - - /* Since we rounded up when computing *_wants, this may be slightly more - * than the maximum allowed amount, so correct for that. - */ - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); - - /* Ensure that we program a multiple of the granularity. */ - nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); - nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); - - /* Finally, sanity check to make sure we have at least the minimum number - * of entries needed for each stage. - */ - assert(nr_vs_entries >= devinfo->urb.min_vs_entries); - if (gs_present) - assert(nr_gs_entries >= 2); - - /* Lay out the URB in the following order: - * - push constants - * - VS - * - GS - */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; - - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; -} - -static const struct { - uint32_t token; - gl_shader_stage stage; - const char *name; -} stage_info[] = { - { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, - { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, - { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, - { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, - { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, - { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, -}; - -struct spirv_header{ - uint32_t magic; - uint32_t version; - uint32_t gen_magic; -}; - -static void -setup_nir_io(struct gl_shader *mesa_shader, - nir_shader *shader) -{ - struct gl_program *prog = mesa_shader->Program; - foreach_list_typed(nir_variable, var, node, &shader->inputs) { - prog->InputsRead |= BITFIELD64_BIT(var->data.location); - if (shader->stage == MESA_SHADER_FRAGMENT) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog; - - fprog->InterpQualifier[var->data.location] = - (glsl_interp_qualifier)var->data.interpolation; - if (var->data.centroid) - fprog->IsCentroid |= BITFIELD64_BIT(var->data.location); - if (var->data.sample) - fprog->IsSample |= BITFIELD64_BIT(var->data.location); - } - } - - foreach_list_typed(nir_variable, var, node, &shader->outputs) { - prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); - } - - shader->info.system_values_read = 0; - foreach_list_typed(nir_variable, var, node, &shader->system_values) { - shader->info.system_values_read |= BITFIELD64_BIT(var->data.location); - } - - shader->info.inputs_read = prog->InputsRead; - shader->info.outputs_written = prog->OutputsWritten; -} - -static void -anv_compile_shader_spirv(struct anv_compiler *compiler, - struct gl_shader_program *program, - struct anv_pipeline *pipeline, uint32_t stage) -{ - struct brw_context *brw = compiler->brw; - struct anv_shader *shader = pipeline->shaders[stage]; - struct gl_shader *mesa_shader; - int name = 0; - - mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); - fail_if(mesa_shader == NULL, - "failed to create %s shader\n", stage_info[stage].name); - -#define CREATE_PROGRAM(stage) \ - &ralloc(mesa_shader, struct brw_##stage##_program)->program.Base - - bool is_scalar; - struct gl_program *prog; - switch (stage) { - case VK_SHADER_STAGE_VERTEX: - prog = CREATE_PROGRAM(vertex); - is_scalar = compiler->screen->compiler->scalar_vs; - break; - case VK_SHADER_STAGE_GEOMETRY: - prog = CREATE_PROGRAM(geometry); - is_scalar = false; - break; - case VK_SHADER_STAGE_FRAGMENT: - prog = CREATE_PROGRAM(fragment); - is_scalar = true; - break; - case VK_SHADER_STAGE_COMPUTE: - prog = CREATE_PROGRAM(compute); - is_scalar = true; - break; - default: - unreachable("Unsupported shader stage"); - } - _mesa_init_gl_program(prog, 0, 0); - _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog); - - mesa_shader->Program->Parameters = - rzalloc(mesa_shader, struct gl_program_parameter_list); - - mesa_shader->Type = stage_info[stage].token; - mesa_shader->Stage = stage_info[stage].stage; - - struct gl_shader_compiler_options *glsl_options = - &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; - - if (shader->module->nir) { - /* Some things such as our meta clear/blit code will give us a NIR - * shader directly. In that case, we just ignore the SPIR-V entirely - * and just use the NIR shader */ - mesa_shader->Program->nir = shader->module->nir; - mesa_shader->Program->nir->options = glsl_options->NirOptions; - } else { - uint32_t *spirv = (uint32_t *) shader->module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(shader->module->size % 4 == 0); - - mesa_shader->Program->nir = - spirv_to_nir(spirv, shader->module->size / 4, - stage_info[stage].stage, glsl_options->NirOptions); - } - nir_validate_shader(mesa_shader->Program->nir); - - brw_preprocess_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, is_scalar); - - setup_nir_io(mesa_shader, mesa_shader->Program->nir); - - brw_postprocess_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, is_scalar); - - mesa_shader->num_uniform_components = - mesa_shader->Program->nir->num_uniforms; - - fail_if(mesa_shader->Program->nir == NULL, - "failed to translate SPIR-V to NIR\n"); - - _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders], - mesa_shader); - program->NumShaders++; -} - -static void -add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, - struct brw_stage_prog_data *prog_data) -{ - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, - [VK_SHADER_STAGE_TESS_CONTROL] = 0, - [VK_SHADER_STAGE_TESS_EVALUATION] = 0, - [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, - [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, - [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, - }; - - pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= 1 << stage; - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; -} - -int -anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) -{ - struct gl_shader_program *program; - int name = 0; - struct brw_context *brw = compiler->brw; - - pipeline->writes_point_size = false; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - brw->use_rep_send = pipeline->use_repclear; - brw->no_simd8 = pipeline->use_repclear; - - program = _mesa_new_shader_program(name); - program->Shaders = (struct gl_shader **) - calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); - fail_if(program == NULL || program->Shaders == NULL, - "failed to create program\n"); - - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_spirv(compiler, program, pipeline, i); - } - - for (unsigned i = 0; i < program->NumShaders; i++) { - struct gl_shader *shader = program->Shaders[i]; - program->_LinkedShaders[shader->Stage] = shader; - } - - bool success; - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { - struct brw_vs_prog_key vs_key; - struct gl_vertex_program *vp = (struct gl_vertex_program *) - program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; - struct brw_vertex_program *bvp = brw_vertex_program(vp); - - brw_vs_populate_key(brw, bvp, &vs_key); - - success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, - &pipeline->vs_prog_data.base.base); - - if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - } - - /* Geometry shaders not yet supported */ - anv_assert(pipeline->shaders[VK_SHADER_STAGE_GEOMETRY] == NULL); - pipeline->gs_vec4 = NO_KERNEL; - - if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { - struct brw_wm_prog_key wm_key; - struct gl_fragment_program *fp = (struct gl_fragment_program *) - program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; - struct brw_fragment_program *bfp = brw_fragment_program(fp); - - brw_wm_populate_key(brw, bfp, &wm_key); - - success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, - &pipeline->wm_prog_data.base); - } - - if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { - struct brw_cs_prog_key cs_key; - struct gl_compute_program *cp = (struct gl_compute_program *) - program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; - struct brw_compute_program *bcp = brw_compute_program(cp); - - brw_cs_populate_key(brw, bcp, &cs_key); - - success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); - fail_if(!success, "brw_codegen_cs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, - &pipeline->cs_prog_data.base); - } - - _mesa_delete_shader_program(&brw->ctx, program); - - struct anv_device *device = compiler->device; - while (device->scratch_block_pool.bo.size < pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - - gen7_compute_urb_partition(pipeline); - - return 0; -} - -/* This badly named function frees the struct anv_pipeline data that the compiler - * allocates. Currently just the prog_data structs. - */ -void -anv_compiler_free(struct anv_pipeline *pipeline) -{ - for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { - if (pipeline->prog_data[stage]) { - /* We only ever set up the params array because we don't do - * non-UBO pull constants - */ - anv_device_free(pipeline->device, pipeline->prog_data[stage]->param); - } - } -} - -} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e0bb7f6e4bc..fd87c85b0ce 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -33,6 +33,22 @@ struct anv_dispatch_table dtable; +static void +compiler_debug_log(void *data, const char *fmt, ...) +{ } + +static void +compiler_perf_log(void *data, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + vfprintf(stderr, fmt, args); + + va_end(args); +} + static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, @@ -91,11 +107,15 @@ anv_physical_device_init(struct anv_physical_device *device, close(fd); + brw_process_intel_debug_variable(); + device->compiler = brw_compiler_create(NULL, device->info); if (device->compiler == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; return VK_SUCCESS; @@ -146,7 +166,6 @@ static const VkExtensionProperties device_extensions[] = { }, }; - VkResult anv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance) @@ -633,8 +652,6 @@ VkResult anv_CreateDevice( device->info = *physical_device->info; - device->compiler = anv_compiler_create(device); - anv_queue_init(device, &device->queue); anv_device_init_meta(device); @@ -658,8 +675,6 @@ void anv_DestroyDevice( { ANV_FROM_HANDLE(anv_device, device, _device); - anv_compiler_destroy(device->compiler); - anv_queue_finish(&device->queue); anv_device_finish_meta(device); diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index fda382eee19..a923017310a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -28,6 +28,12 @@ #include <fcntl.h> #include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "glsl/nir/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" // Shader functions @@ -81,16 +87,12 @@ VkResult anv_CreateShader( const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; size_t name_len = strlen(name); - if (strcmp(name, "main") != 0) { - anv_finishme("Multiple shaders per module not really supported"); - } - shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (shader == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - shader->module = module; + shader->module = module, memcpy(shader->entrypoint, name, name_len + 1); *pShader = anv_shader_to_handle(shader); @@ -108,6 +110,86 @@ void anv_DestroyShader( anv_device_free(device, shader); } +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = { + [VK_SHADER_STAGE_VERTEX] = MESA_SHADER_VERTEX, + [VK_SHADER_STAGE_TESS_CONTROL] = -1, + [VK_SHADER_STAGE_TESS_EVALUATION] = -1, + [VK_SHADER_STAGE_GEOMETRY] = MESA_SHADER_GEOMETRY, + [VK_SHADER_STAGE_FRAGMENT] = MESA_SHADER_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE, +}; + +static bool +is_scalar_shader_stage(const struct brw_compiler *compiler, VkShaderStage stage) +{ + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + return compiler->scalar_vs; + case VK_SHADER_STAGE_GEOMETRY: + return false; + case VK_SHADER_STAGE_FRAGMENT: + case VK_SHADER_STAGE_COMPUTE: + return true; + default: + unreachable("Unsupported shader stage"); + } +} + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader *shader, VkShaderStage vk_stage) +{ + if (strcmp(shader->entrypoint, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + gl_shader_stage stage = vk_shader_stage_to_mesa_stage[vk_stage]; + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + if (shader->module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = shader->module->nir; + nir->options = nir_options; + } else { + uint32_t *spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); + + nir = spirv_to_nir(spirv, shader->module->size / 4, stage, nir_options); + } + nir_validate_shader(nir); + + /* Make sure the provided shader has exactly one entrypoint and that the + * name matches the name that came in from the VkShader. + */ + nir_function_impl *entrypoint = NULL; + nir_foreach_overload(nir, overload) { + if (strcmp(shader->entrypoint, overload->function->name) == 0 && + overload->impl) { + assert(entrypoint == NULL); + entrypoint = overload->impl; + } + } + assert(entrypoint != NULL); + + brw_preprocess_nir(nir, &device->info, + is_scalar_shader_stage(compiler, vk_stage)); + + nir_shader_gather_info(nir, entrypoint); + + return nir; +} VkResult anv_CreatePipelineCache( VkDevice device, @@ -156,7 +238,6 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - anv_compiler_free(pipeline); anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); anv_state_stream_finish(&pipeline->program_stream); anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); @@ -178,6 +259,506 @@ static const uint32_t vk_to_gen_primitive_type[] = { }; static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* XXX: These are needed for flipping the coordinates. Do we need to do + * this in Vulkan? + */ + key->drawable_height = 0; + key->render_to_fbo = true; /* XXX really? */ + + key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pColorBlendState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->compute_sample_id = true; + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader *shader, + VkShaderStage stage, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, shader, stage); + if (nir == NULL) + return NULL; + + bool have_push_constants = false; + nir_foreach_variable(var, &nir->uniforms) { + if (!glsl_type_is_sampler(var->type)) { + have_push_constants = true; + break; + } + } + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (have_push_constants) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS; + + if (prog_data->nr_params > 0) { + prog_data->param = (const gl_constant_value **) + anv_device_alloc(pipeline->device, + prog_data->nr_params * sizeof(gl_constant_value *), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (have_push_constants) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + anv_nir_apply_pipeline_layout(nir, pipeline->layout); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias = stage == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + brw_postprocess_nir(nir, &pipeline->device->info, + is_scalar_shader_stage(compiler, stage)); + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params; + + return nir; +} + +static uint32_t +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + VkShaderStage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + + populate_vs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_VERTEX, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + false /* XXX: Do SSO? */); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + const uint32_t offset = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + + populate_wm_prog_key(&pipeline->device->info, info, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_FRAGMENT, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t offset = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + const VkComputePipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + + populate_cs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_COMPUTE, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { @@ -335,7 +916,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); result = anv_reloc_list_init(&pipeline->batch_relocs, device); if (result != VK_SUCCESS) { @@ -349,11 +929,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - pipeline->shaders[pCreateInfo->pStages[i].stage] = - anv_shader_from_handle(pCreateInfo->pStages[i].shader); - } - anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); if (pCreateInfo->pTessellationState) @@ -363,27 +938,44 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; - anv_compiler_run(device->compiler, pipeline); + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_vec4 = NO_KERNEL; - pipeline->ps_ksp2 = 0; - pipeline->ps_grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->pStages[i].shader); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX: + anv_pipeline_compile_vs(pipeline, pCreateInfo, shader); + break; + case VK_SHADER_STAGE_FRAGMENT: + anv_pipeline_compile_fs(pipeline, pCreateInfo, shader); + break; + default: + anv_finishme("Unsupported shader stage"); } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; - } else { - unreachable("no ps shader"); } + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; pipeline->vb_used = 0; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8c48a9c2b0a..c1a7b01e8b0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -499,7 +499,6 @@ struct anv_device { struct anv_block_pool scratch_block_pool; - struct anv_compiler * compiler; pthread_mutex_t mutex; }; @@ -1089,7 +1088,6 @@ struct anv_pipeline { uint32_t dynamic_state_mask; struct anv_dynamic_state dynamic_state; - struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; struct anv_pipeline_layout * layout; bool use_repclear; @@ -1161,6 +1159,11 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const struct anv_graphics_pipeline_create_info *extra); VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + const VkComputePipelineCreateInfo *info, + struct anv_shader *shader); + +VkResult anv_graphics_pipeline_create(VkDevice device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, @@ -1187,11 +1190,6 @@ gen8_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); -struct anv_compiler *anv_compiler_create(struct anv_device *device); -void anv_compiler_destroy(struct anv_compiler *compiler); -int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); -void anv_compiler_free(struct anv_pipeline *pipeline); - struct anv_format { const VkFormat vk_format; const char *name; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 0e2526fce20..f3f378b181a 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -568,19 +568,29 @@ VkResult gen8_compute_pipeline_create( anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - anv_shader_from_handle(pCreateInfo->stage.shader); + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_vec4 = NO_KERNEL; - pipeline->use_repclear = false; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE); + ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->stage.shader); + anv_pipeline_compile_cs(pipeline, pCreateInfo, shader); - anv_compiler_run(device->compiler, pipeline); + pipeline->use_repclear = false; const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_COMPUTE], .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), .ScratchSpaceBasePointerHigh = 0, .StackSize = 0, |