diff options
Diffstat (limited to 'src/vulkan/anv_compiler.cpp')
-rw-r--r-- | src/vulkan/anv_compiler.cpp | 891 |
1 files changed, 0 insertions, 891 deletions
diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp deleted file mode 100644 index 1ecd88a0331..00000000000 --- a/src/vulkan/anv_compiler.cpp +++ /dev/null @@ -1,891 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> - -#include "anv_private.h" -#include "anv_nir.h" - -#include <brw_context.h> -#include <brw_wm.h> /* brw_new_shader_program is here */ -#include <brw_nir.h> - -#include <brw_vs.h> -#include <brw_gs.h> -#include <brw_cs.h> -#include "brw_vec4_gs_visitor.h" -#include <brw_compiler.h> - -#include <mesa/main/shaderobj.h> -#include <mesa/main/fbobject.h> -#include <mesa/main/context.h> -#include <mesa/program/program.h> -#include <glsl/program.h> - -/* XXX: We need this to keep symbols in nir.h from conflicting with the - * generated GEN command packing headers. We need to fix *both* to not - * define something as generic as LOAD. - */ -#undef LOAD - -#include <glsl/nir/nir_spirv.h> - -#define SPIR_V_MAGIC_NUMBER 0x07230203 - -static void -fail_if(int cond, const char *format, ...) -{ - va_list args; - - if (!cond) - return; - - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - - exit(1); -} - -static VkResult -set_binding_table_layout(struct brw_stage_prog_data *prog_data, - struct anv_pipeline *pipeline, uint32_t stage) -{ - unsigned bias; - if (stage == VK_SHADER_STAGE_FRAGMENT) - bias = MAX_RTS; - else - bias = 0; - - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.image_start = bias; - - return VK_SUCCESS; -} - -static uint32_t -upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) -{ - struct anv_state state = - anv_state_stream_alloc(&pipeline->program_stream, size, 64); - - assert(size < pipeline->program_stream.block_pool->block_size); - - memcpy(state.map, data, size); - - return state.offset; -} - -static void -create_params_array(struct anv_pipeline *pipeline, - struct gl_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage); - unsigned num_params = 0; - - if (shader->num_uniform_components) { - /* If the shader uses any push constants at all, we'll just give - * them the maximum possible number - */ - num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } - - if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - num_params += MAX_DYNAMIC_BUFFERS; - - if (num_params == 0) - return; - - prog_data->param = (const gl_constant_value **) - anv_device_alloc(pipeline->device, - num_params * sizeof(gl_constant_value *), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); - - /* We now set the param values to be offsets into a - * anv_push_constant_data structure. Since the compiler doesn't - * actually dereference any of the gl_constant_value pointers in the - * params array, it doesn't really matter what we put here. - */ - struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < num_params; i++) - prog_data->param[i] = - (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; -} - -static void -brw_vs_populate_key(struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* XXX: Handle vertex input work-arounds */ - - /* XXX: Handle sampler_prog_key */ -} - -static bool -really_do_vs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) -{ - GLuint program_size; - const GLuint *program; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - void *mem_ctx; - struct gl_shader *vs = NULL; - - if (prog) - vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; - - memset(prog_data, 0, sizeof(*prog_data)); - - mem_ctx = ralloc_context(NULL); - - create_params_array(pipeline, vs, &prog_data->base.base); - anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, - &prog_data->base.base); - anv_nir_apply_pipeline_layout(vs->Program->nir, pipeline->layout); - - prog_data->inputs_read = vp->program.Base.InputsRead; - - brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data->base.vue_map, - vp->program.Base.OutputsWritten, - prog ? prog->SeparateShader : false); - - set_binding_table_layout(&prog_data->base.base, pipeline, - VK_SHADER_STAGE_VERTEX); - - /* Emit GEN4 code. - */ - program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, - key, prog_data, vs->Program->nir, NULL, false, -1, - &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - const uint32_t offset = upload_kernel(pipeline, program, program_size); - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = offset; - pipeline->vs_vec4 = NO_KERNEL; - } else { - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = offset; - } - - ralloc_free(mem_ctx); - - return true; -} - -void brw_wm_populate_key(struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - bool program_uses_dfdy = fp->program.UsesDFdy; - struct gl_framebuffer draw_buffer; - bool multisample_fbo; - - memset(key, 0, sizeof(*key)); - - for (int i = 0; i < MAX_SAMPLERS; i++) { - /* Assume color sampler, no swizzling. */ - key->tex.swizzles[i] = SWIZZLE_XYZW; - } - - /* A non-zero framebuffer name indicates that the framebuffer was created by - * the user rather than the window system. */ - draw_buffer.Name = 1; - draw_buffer.Visual.samples = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer.Width = 400; - draw_buffer.Height = 400; - ctx->DrawBuffer = &draw_buffer; - - multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; - - /* _NEW_HINT */ - key->high_quality_derivatives = - ctx->Hint.FragmentShaderDerivative == GL_NICEST; - - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ - key->clamp_fragment_color = ctx->Color._ClampFragmentColor; - - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * This is only needed for the WM_WPOSXY opcode when the fragment program - * uses the gl_FragCoord input. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * For DRI2 the origin_x/y will always be (0,0) but we still need the - * drawable height in order to invert the Y axis. - */ - if (fp->program.Base.InputsRead & VARYING_BIT_POS) { - key->drawable_height = ctx->DrawBuffer->Height; - } - - if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { - key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - } - - /* _NEW_BUFFERS */ - key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; - - /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ - key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); - - /* _NEW_BUFFERS _NEW_MULTISAMPLE */ - /* Ignore sample qualifier while computing this flag. */ - key->persample_shading = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; - if (key->persample_shading) - key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; - - key->compute_pos_offset = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && - fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; - - key->compute_sample_id = - multisample_fbo && - ctx->Multisample.Enabled && - (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); - - /* The unique fragment program ID */ - key->program_string_id = fp->id; - - ctx->DrawBuffer = NULL; -} - -static bool -really_do_wm_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) -{ - void *mem_ctx = ralloc_context(NULL); - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; - struct gl_shader *fs = NULL; - unsigned int program_size; - const uint32_t *program; - - if (prog) - fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - memset(prog_data, 0, sizeof(*prog_data)); - - create_params_array(pipeline, fs, &prog_data->base); - anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); - anv_nir_apply_pipeline_layout(fs->Program->nir, pipeline->layout); - - set_binding_table_layout(&prog_data->base, pipeline, - VK_SHADER_STAGE_FRAGMENT); - /* This needs to come after shader time and pull constant entries, but we - * don't have those set up now, so just put it after the layout entries. - */ - prog_data->binding_table.render_target_start = 0; - - program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, key, - prog_data, fp->program.Base.nir, fs->Program, - -1, -1, brw->use_rep_send, &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - uint32_t offset = upload_kernel(pipeline, program, program_size); - - if (prog_data->no_8) - pipeline->ps_simd8 = NO_KERNEL; - else - pipeline->ps_simd8 = offset; - - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = offset + prog_data->prog_offset_16; - } else { - pipeline->ps_simd16 = NO_KERNEL; - } - - ralloc_free(mem_ctx); - - return true; -} - -static bool -brw_codegen_cs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_compute_program *cp, - struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) -{ - const GLuint *program; - void *mem_ctx = ralloc_context(NULL); - GLuint program_size; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - assert (cs); - - memset(prog_data, 0, sizeof(*prog_data)); - - set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); - - create_params_array(pipeline, cs, &prog_data->base); - anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); - anv_nir_apply_pipeline_layout(cs->Program->nir, pipeline->layout); - - program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, key, - prog_data, cs->Program->nir, -1, - &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - fprintf(stderr, "\n"); - - pipeline->cs_simd = upload_kernel(pipeline, program, program_size); - - ralloc_free(mem_ctx); - - return true; -} - -static void -brw_cs_populate_key(struct brw_context *brw, - struct brw_compute_program *bcp, struct brw_cs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* The unique compute program ID */ - key->program_string_id = bcp->id; -} - -struct anv_compiler { - struct anv_device *device; - struct intel_screen *screen; - struct brw_context *brw; - struct gl_pipeline_object pipeline; -}; - -extern "C" { - -struct anv_compiler * -anv_compiler_create(struct anv_device *device) -{ - const struct brw_device_info *devinfo = &device->info; - struct anv_compiler *compiler; - struct gl_context *ctx; - - compiler = rzalloc(NULL, struct anv_compiler); - if (compiler == NULL) - return NULL; - - compiler->screen = rzalloc(compiler, struct intel_screen); - if (compiler->screen == NULL) - goto fail; - - compiler->brw = rzalloc(compiler, struct brw_context); - if (compiler->brw == NULL) - goto fail; - - compiler->device = device; - - compiler->brw->gen = devinfo->gen; - compiler->brw->is_g4x = devinfo->is_g4x; - compiler->brw->is_baytrail = devinfo->is_baytrail; - compiler->brw->is_haswell = devinfo->is_haswell; - compiler->brw->is_cherryview = devinfo->is_cherryview; - - /* We need this at least for CS, which will check brw->max_cs_threads - * against the work group size. */ - compiler->brw->max_vs_threads = devinfo->max_vs_threads; - compiler->brw->max_hs_threads = devinfo->max_hs_threads; - compiler->brw->max_ds_threads = devinfo->max_ds_threads; - compiler->brw->max_gs_threads = devinfo->max_gs_threads; - compiler->brw->max_wm_threads = devinfo->max_wm_threads; - compiler->brw->max_cs_threads = devinfo->max_cs_threads; - compiler->brw->urb.size = devinfo->urb.size; - compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; - compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; - compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; - compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; - compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - - compiler->brw->intelScreen = compiler->screen; - compiler->screen->devinfo = &device->info; - - brw_process_intel_debug_variable(); - - compiler->screen->compiler = device->instance->physicalDevice.compiler; - - ctx = &compiler->brw->ctx; - _mesa_init_shader_object_functions(&ctx->Driver); - - /* brw_select_clip_planes() needs this for bogus reasons. */ - ctx->_Shader = &compiler->pipeline; - - return compiler; - - fail: - ralloc_free(compiler); - return NULL; -} - -void -anv_compiler_destroy(struct anv_compiler *compiler) -{ - _mesa_free_errors_data(&compiler->brw->ctx); - ralloc_free(compiler); -} - -/* From gen7_urb.c */ - -/* FIXME: Add to struct intel_device_info */ - -static const int gen8_push_size = 32 * 1024; - -static void -gen7_compute_urb_partition(struct anv_pipeline *pipeline) -{ - const struct brw_device_info *devinfo = &pipeline->device->info; - bool vs_present = pipeline->vs_simd8 != NO_KERNEL; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; - unsigned vs_entry_size_bytes = vs_size * 64; - bool gs_present = pipeline->gs_vec4 != NO_KERNEL; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; - unsigned gs_entry_size_bytes = gs_size * 64; - - /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): - * - * VS Number of URB Entries must be divisible by 8 if the VS URB Entry - * Allocation Size is less than 9 512-bit URB entries. - * - * Similar text exists for GS. - */ - unsigned vs_granularity = (vs_size < 9) ? 8 : 1; - unsigned gs_granularity = (gs_size < 9) ? 8 : 1; - - /* URB allocations must be done in 8k chunks. */ - unsigned chunk_size_bytes = 8192; - - /* Determine the size of the URB in chunks. */ - unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; - - /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; - unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - - /* Initially, assign each stage the minimum amount of URB space it needs, - * and make a note of how much additional space it "wants" (the amount of - * additional space it could actually make use of). - */ - - /* VS has a lower limit on the number of URB entries */ - unsigned vs_chunks = - ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - unsigned vs_wants = - ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - vs_chunks; - - unsigned gs_chunks = 0; - unsigned gs_wants = 0; - if (gs_present) { - /* There are two constraints on the minimum amount of URB space we can - * allocate: - * - * (1) We need room for at least 2 URB entries, since we always operate - * the GS in DUAL_OBJECT mode. - * - * (2) We can't allocate less than nr_gs_entries_granularity. - */ - gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - gs_wants = - ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - gs_chunks; - } - - /* There should always be enough URB space to satisfy the minimum - * requirements of each stage. - */ - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; - assert(total_needs <= urb_chunks); - - /* Mete out remaining space (if any) in proportion to "wants". */ - unsigned total_wants = vs_wants + gs_wants; - unsigned remaining_space = urb_chunks - total_needs; - if (remaining_space > total_wants) - remaining_space = total_wants; - if (remaining_space > 0) { - unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); - vs_chunks += vs_additional; - remaining_space -= vs_additional; - gs_chunks += remaining_space; - } - - /* Sanity check that we haven't over-allocated. */ - assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); - - /* Finally, compute the number of entries that can fit in the space - * allocated to each stage. - */ - unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; - unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; - - /* Since we rounded up when computing *_wants, this may be slightly more - * than the maximum allowed amount, so correct for that. - */ - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); - - /* Ensure that we program a multiple of the granularity. */ - nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); - nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); - - /* Finally, sanity check to make sure we have at least the minimum number - * of entries needed for each stage. - */ - assert(nr_vs_entries >= devinfo->urb.min_vs_entries); - if (gs_present) - assert(nr_gs_entries >= 2); - - /* Lay out the URB in the following order: - * - push constants - * - VS - * - GS - */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; - - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; -} - -static const struct { - uint32_t token; - gl_shader_stage stage; - const char *name; -} stage_info[] = { - { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, - { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, - { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, - { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, - { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, - { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, -}; - -struct spirv_header{ - uint32_t magic; - uint32_t version; - uint32_t gen_magic; -}; - -static void -setup_nir_io(struct gl_shader *mesa_shader, - nir_shader *shader) -{ - struct gl_program *prog = mesa_shader->Program; - foreach_list_typed(nir_variable, var, node, &shader->inputs) { - prog->InputsRead |= BITFIELD64_BIT(var->data.location); - if (shader->stage == MESA_SHADER_FRAGMENT) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog; - - fprog->InterpQualifier[var->data.location] = - (glsl_interp_qualifier)var->data.interpolation; - if (var->data.centroid) - fprog->IsCentroid |= BITFIELD64_BIT(var->data.location); - if (var->data.sample) - fprog->IsSample |= BITFIELD64_BIT(var->data.location); - } - } - - foreach_list_typed(nir_variable, var, node, &shader->outputs) { - prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); - } - - shader->info.system_values_read = 0; - foreach_list_typed(nir_variable, var, node, &shader->system_values) { - shader->info.system_values_read |= BITFIELD64_BIT(var->data.location); - } - - shader->info.inputs_read = prog->InputsRead; - shader->info.outputs_written = prog->OutputsWritten; -} - -static void -anv_compile_shader_spirv(struct anv_compiler *compiler, - struct gl_shader_program *program, - struct anv_pipeline *pipeline, uint32_t stage) -{ - struct brw_context *brw = compiler->brw; - struct anv_shader *shader = pipeline->shaders[stage]; - struct gl_shader *mesa_shader; - int name = 0; - - mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); - fail_if(mesa_shader == NULL, - "failed to create %s shader\n", stage_info[stage].name); - -#define CREATE_PROGRAM(stage) \ - &ralloc(mesa_shader, struct brw_##stage##_program)->program.Base - - bool is_scalar; - struct gl_program *prog; - switch (stage) { - case VK_SHADER_STAGE_VERTEX: - prog = CREATE_PROGRAM(vertex); - is_scalar = compiler->screen->compiler->scalar_vs; - break; - case VK_SHADER_STAGE_GEOMETRY: - prog = CREATE_PROGRAM(geometry); - is_scalar = false; - break; - case VK_SHADER_STAGE_FRAGMENT: - prog = CREATE_PROGRAM(fragment); - is_scalar = true; - break; - case VK_SHADER_STAGE_COMPUTE: - prog = CREATE_PROGRAM(compute); - is_scalar = true; - break; - default: - unreachable("Unsupported shader stage"); - } - _mesa_init_gl_program(prog, 0, 0); - _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog); - - mesa_shader->Program->Parameters = - rzalloc(mesa_shader, struct gl_program_parameter_list); - - mesa_shader->Type = stage_info[stage].token; - mesa_shader->Stage = stage_info[stage].stage; - - struct gl_shader_compiler_options *glsl_options = - &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; - - if (shader->module->nir) { - /* Some things such as our meta clear/blit code will give us a NIR - * shader directly. In that case, we just ignore the SPIR-V entirely - * and just use the NIR shader */ - mesa_shader->Program->nir = shader->module->nir; - mesa_shader->Program->nir->options = glsl_options->NirOptions; - } else { - uint32_t *spirv = (uint32_t *) shader->module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(shader->module->size % 4 == 0); - - mesa_shader->Program->nir = - spirv_to_nir(spirv, shader->module->size / 4, - stage_info[stage].stage, glsl_options->NirOptions); - } - nir_validate_shader(mesa_shader->Program->nir); - - brw_preprocess_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, is_scalar); - - setup_nir_io(mesa_shader, mesa_shader->Program->nir); - - brw_postprocess_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, is_scalar); - - mesa_shader->num_uniform_components = - mesa_shader->Program->nir->num_uniforms; - - fail_if(mesa_shader->Program->nir == NULL, - "failed to translate SPIR-V to NIR\n"); - - _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders], - mesa_shader); - program->NumShaders++; -} - -static void -add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, - struct brw_stage_prog_data *prog_data) -{ - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, - [VK_SHADER_STAGE_TESS_CONTROL] = 0, - [VK_SHADER_STAGE_TESS_EVALUATION] = 0, - [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, - [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, - [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, - }; - - pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= 1 << stage; - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; -} - -int -anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) -{ - struct gl_shader_program *program; - int name = 0; - struct brw_context *brw = compiler->brw; - - pipeline->writes_point_size = false; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - brw->use_rep_send = pipeline->use_repclear; - brw->no_simd8 = pipeline->use_repclear; - - program = _mesa_new_shader_program(name); - program->Shaders = (struct gl_shader **) - calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); - fail_if(program == NULL || program->Shaders == NULL, - "failed to create program\n"); - - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_spirv(compiler, program, pipeline, i); - } - - for (unsigned i = 0; i < program->NumShaders; i++) { - struct gl_shader *shader = program->Shaders[i]; - program->_LinkedShaders[shader->Stage] = shader; - } - - bool success; - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { - struct brw_vs_prog_key vs_key; - struct gl_vertex_program *vp = (struct gl_vertex_program *) - program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; - struct brw_vertex_program *bvp = brw_vertex_program(vp); - - brw_vs_populate_key(brw, bvp, &vs_key); - - success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, - &pipeline->vs_prog_data.base.base); - - if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - } - - /* Geometry shaders not yet supported */ - anv_assert(pipeline->shaders[VK_SHADER_STAGE_GEOMETRY] == NULL); - pipeline->gs_vec4 = NO_KERNEL; - - if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { - struct brw_wm_prog_key wm_key; - struct gl_fragment_program *fp = (struct gl_fragment_program *) - program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; - struct brw_fragment_program *bfp = brw_fragment_program(fp); - - brw_wm_populate_key(brw, bfp, &wm_key); - - success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, - &pipeline->wm_prog_data.base); - } - - if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { - struct brw_cs_prog_key cs_key; - struct gl_compute_program *cp = (struct gl_compute_program *) - program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; - struct brw_compute_program *bcp = brw_compute_program(cp); - - brw_cs_populate_key(brw, bcp, &cs_key); - - success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); - fail_if(!success, "brw_codegen_cs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, - &pipeline->cs_prog_data.base); - } - - _mesa_delete_shader_program(&brw->ctx, program); - - struct anv_device *device = compiler->device; - while (device->scratch_block_pool.bo.size < pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - - gen7_compute_urb_partition(pipeline); - - return 0; -} - -/* This badly named function frees the struct anv_pipeline data that the compiler - * allocates. Currently just the prog_data structs. - */ -void -anv_compiler_free(struct anv_pipeline *pipeline) -{ - for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { - if (pipeline->prog_data[stage]) { - /* We only ever set up the params array because we don't do - * non-UBO pull constants - */ - anv_device_free(pipeline->device, pipeline->prog_data[stage]->param); - } - } -} - -} |