diff options
author | Jason Ekstrand <[email protected]> | 2016-02-09 15:30:39 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-02-09 15:30:39 -0800 |
commit | 768bd7f272e0dfd8cc17c49750fe8aaab78bb420 (patch) | |
tree | 5d8e07b6539565cbfe9ebff5d77cde8b6a8bf566 /src/compiler | |
parent | 4c5dcccfba3c9d0e5c7302aa797ad8d31f18cf52 (diff) | |
parent | 8b0fb1c152fe191768953aa8c77b89034a377f83 (diff) |
Merge commit '8b0fb1c152fe191768953aa8c77b89034a377f83' into vulkan
This pulls in Rob Clark's const_index changes for NIR
Diffstat (limited to 'src/compiler')
25 files changed, 539 insertions, 446 deletions
diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp index 0eb456a2b1f..c7fdcb24379 100644 --- a/src/compiler/glsl/ast_function.cpp +++ b/src/compiler/glsl/ast_function.cpp @@ -560,7 +560,8 @@ done: state->symbols->add_global_function(f); emit_function(state, f); } - f->add_signature(sig->clone_prototype(f, NULL)); + sig = sig->clone_prototype(f, NULL); + f->add_signature(sig); } } return sig; diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 7213ad8ebec..a4842400288 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -4211,33 +4211,46 @@ ast_declarator_list::hir(exec_list *instructions, _mesa_glsl_error(&loc, state, "invalid type `%s' in empty declaration", type_name); - } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) { - /* Empty atomic counter declarations are allowed and useful - * to set the default offset qualifier. - */ - return NULL; - } else if (this->type->qualifier.precision != ast_precision_none) { - if (this->type->specifier->structure != NULL) { - _mesa_glsl_error(&loc, state, - "precision qualifiers can't be applied " - "to structures"); - } else { - static const char *const precision_names[] = { - "highp", - "highp", - "mediump", - "lowp" - }; + } else { + if (decl_type->base_type == GLSL_TYPE_ARRAY) { + /* From Section 4.12 (Empty Declarations) of the GLSL 4.5 spec: + * + * "The combinations of types and qualifiers that cause + * compile-time or link-time errors are the same whether or not + * the declaration is empty." + */ + validate_array_dimensions(decl_type, state, &loc); + } - _mesa_glsl_warning(&loc, state, - "empty declaration with precision qualifier, " - "to set the default precision, use " - "`precision %s %s;'", - precision_names[this->type->qualifier.precision], - type_name); + if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) { + /* Empty atomic counter declarations are allowed and useful + * to set the default offset qualifier. + */ + return NULL; + } else if (this->type->qualifier.precision != ast_precision_none) { + if (this->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers can't be applied " + "to structures"); + } else { + static const char *const precision_names[] = { + "highp", + "highp", + "mediump", + "lowp" + }; + + _mesa_glsl_warning(&loc, state, + "empty declaration with precision " + "qualifier, to set the default precision, " + "use `precision %s %s;'", + precision_names[this->type-> + qualifier.precision], + type_name); + } + } else if (this->type->specifier->structure == NULL) { + _mesa_glsl_warning(&loc, state, "empty declaration"); } - } else if (this->type->specifier->structure == NULL) { - _mesa_glsl_warning(&loc, state, "empty declaration"); } } diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index d7a4b254aa2..73d378c4bc9 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -27,6 +27,7 @@ #include "main/core.h" /* for struct gl_context */ #include "main/context.h" +#include "main/debug_output.h" #include "main/shaderobj.h" #include "util/u_atomic.h" /* for p_atomic_cmpxchg */ #include "util/ralloc.h" diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 09e21b22188..bf9b7caffae 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -864,6 +864,14 @@ public: int location; /** + * for glsl->tgsi/mesa IR we need to store the index into the + * parameters for uniforms, initially the code overloaded location + * but this causes problems with indirect samplers and AoA. + * This is assigned in _mesa_generate_parameters_list_for_uniforms. + */ + int param_index; + + /** * Vertex stream output identifier. */ unsigned stream; diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index a4c730ffdcf..590de174507 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -1352,7 +1352,7 @@ private: namespace linker { -bool +void populate_consumer_input_sets(void *mem_ctx, exec_list *ir, hash_table *consumer_inputs, hash_table *consumer_interface_inputs, @@ -1366,8 +1366,8 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir, ir_variable *const input_var = node->as_variable(); if ((input_var != NULL) && (input_var->data.mode == ir_var_shader_in)) { - if (input_var->type->is_interface()) - return false; + /* All interface blocks should have been lowered by this point */ + assert(!input_var->type->is_interface()); if (input_var->data.explicit_location) { /* assign_varying_locations only cares about finding the @@ -1401,8 +1401,6 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir, } } } - - return true; } /** @@ -1626,18 +1624,11 @@ assign_varying_locations(struct gl_context *ctx, if (producer) canonicalize_shader_io(producer->ir, ir_var_shader_out); - if (consumer - && !linker::populate_consumer_input_sets(mem_ctx, - consumer->ir, - consumer_inputs, - consumer_interface_inputs, - consumer_inputs_with_locations)) { - assert(!"populate_consumer_input_sets failed"); - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); - return false; - } + if (consumer) + linker::populate_consumer_input_sets(mem_ctx, consumer->ir, + consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations); if (producer) { foreach_in_list(ir_instruction, node, producer->ir) { @@ -1652,8 +1643,10 @@ assign_varying_locations(struct gl_context *ctx, (output_var->data.stream < MAX_VERTEX_STREAMS && producer->Stage == MESA_SHADER_GEOMETRY)); - tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); - g.process(output_var); + if (num_tfeedback_decls > 0) { + tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); + g.process(output_var); + } ir_variable *const input_var = linker::get_matching_input(mem_ctx, output_var, consumer_inputs, diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 4776ffa6acd..bad1c1742b7 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -82,8 +82,6 @@ #include "main/enums.h" -void linker_error(gl_shader_program *, const char *, ...); - namespace { /** @@ -2125,6 +2123,7 @@ link_intrastage_shaders(void *mem_ctx, if (ok) { memcpy(linking_shaders, shader_list, num_shaders * sizeof(gl_shader *)); + _mesa_glsl_initialize_builtin_functions(); linking_shaders[num_shaders] = _mesa_glsl_get_builtin_function_shader(); ok = link_function_calls(prog, linked, linking_shaders, num_shaders + 1); @@ -4105,15 +4104,34 @@ disable_varying_optimizations_for_sso(struct gl_shader_program *prog) void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) { + prog->LinkStatus = true; /* All error paths will set this to false */ + prog->Validated = false; + prog->_Used = false; + + /* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec says: + * + * "Linking can fail for a variety of reasons as specified in the + * OpenGL Shading Language Specification, as well as any of the + * following reasons: + * + * - No shader objects are attached to program." + * + * The Compatibility Profile specification does not list the error. In + * Compatibility Profile missing shader stages are replaced by + * fixed-function. This applies to the case where all stages are + * missing. + */ + if (prog->NumShaders == 0) { + if (ctx->API != API_OPENGL_COMPAT) + linker_error(prog, "no shaders attached to the program\n"); + return; + } + tfeedback_decl *tfeedback_decls = NULL; unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying; void *mem_ctx = ralloc_context(NULL); // temporary linker context - prog->LinkStatus = true; /* All error paths will set this to false */ - prog->Validated = false; - prog->_Used = false; - prog->ARB_fragment_coord_conventions_enable = false; /* Separate the shaders into groups based on their type. @@ -4129,13 +4147,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) unsigned min_version = UINT_MAX; unsigned max_version = 0; - const bool is_es_prog = - (prog->NumShaders > 0 && prog->Shaders[0]->IsES) ? true : false; for (unsigned i = 0; i < prog->NumShaders; i++) { min_version = MIN2(min_version, prog->Shaders[i]->Version); max_version = MAX2(max_version, prog->Shaders[i]->Version); - if (prog->Shaders[i]->IsES != is_es_prog) { + if (prog->Shaders[i]->IsES != prog->Shaders[0]->IsES) { linker_error(prog, "all shaders must use same shading " "language version\n"); goto done; @@ -4153,80 +4169,59 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) /* In desktop GLSL, different shader versions may be linked together. In * GLSL ES, all shader versions must be the same. */ - if (is_es_prog && min_version != max_version) { + if (prog->Shaders[0]->IsES && min_version != max_version) { linker_error(prog, "all shaders must use same shading " "language version\n"); goto done; } prog->Version = max_version; - prog->IsES = is_es_prog; - - /* From OpenGL 4.5 Core specification (7.3 Program Objects): - * "Linking can fail for a variety of reasons as specified in the OpenGL - * Shading Language Specification, as well as any of the following - * reasons: - * - * * No shader objects are attached to program. - * - * ..." - * - * Same rule applies for OpenGL ES >= 3.1. - */ - - if (prog->NumShaders == 0 && - ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) || - (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) { - linker_error(prog, "No shader objects are attached to program.\n"); - goto done; - } + prog->IsES = prog->Shaders[0]->IsES; /* Some shaders have to be linked with some other shaders present. */ - if (num_shaders[MESA_SHADER_GEOMETRY] > 0 && - num_shaders[MESA_SHADER_VERTEX] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Geometry shader must be linked with " - "vertex shader\n"); - goto done; - } - if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 && - num_shaders[MESA_SHADER_VERTEX] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Tessellation evaluation shader must be linked with " - "vertex shader\n"); - goto done; - } - if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && - num_shaders[MESA_SHADER_VERTEX] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Tessellation control shader must be linked with " - "vertex shader\n"); - goto done; - } + if (!prog->SeparateShader) { + if (num_shaders[MESA_SHADER_GEOMETRY] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0) { + linker_error(prog, "Geometry shader must be linked with " + "vertex shader\n"); + goto done; + } + if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0) { + linker_error(prog, "Tessellation evaluation shader must be linked " + "with vertex shader\n"); + goto done; + } + if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0) { + linker_error(prog, "Tessellation control shader must be linked with " + "vertex shader\n"); + goto done; + } - /* The spec is self-contradictory here. It allows linking without a tess - * eval shader, but that can only be used with transform feedback and - * rasterization disabled. However, transform feedback isn't allowed - * with GL_PATCHES, so it can't be used. - * - * More investigation showed that the idea of transform feedback after - * a tess control shader was dropped, because some hw vendors couldn't - * support tessellation without a tess eval shader, but the linker section - * wasn't updated to reflect that. - * - * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this - * spec bug. - * - * Do what's reasonable and always require a tess eval shader if a tess - * control shader is present. - */ - if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && - num_shaders[MESA_SHADER_TESS_EVAL] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Tessellation control shader must be linked with " - "tessellation evaluation shader\n"); - goto done; + /* The spec is self-contradictory here. It allows linking without a tess + * eval shader, but that can only be used with transform feedback and + * rasterization disabled. However, transform feedback isn't allowed + * with GL_PATCHES, so it can't be used. + * + * More investigation showed that the idea of transform feedback after + * a tess control shader was dropped, because some hw vendors couldn't + * support tessellation without a tess eval shader, but the linker + * section wasn't updated to reflect that. + * + * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this + * spec bug. + * + * Do what's reasonable and always require a tess eval shader if a tess + * control shader is present. + */ + if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && + num_shaders[MESA_SHADER_TESS_EVAL] == 0) { + linker_error(prog, "Tessellation control shader must be linked with " + "tessellation evaluation shader\n"); + goto done; + } } /* Compute shaders have additional restrictions. */ @@ -4362,7 +4357,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * * This rule also applies to GLSL ES 3.00. */ - if (max_version >= (is_es_prog ? 300 : 130)) { + if (max_version >= (prog->IsES ? 300 : 130)) { struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (sh) { lower_discard_flow(sh->ir); @@ -4451,9 +4446,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * non-zero, but the program object has no vertex or geometry * shader; */ - if (first == MESA_SHADER_FRAGMENT) { + if (first >= MESA_SHADER_FRAGMENT) { linker_error(prog, "Transform feedback varyings specified, but " - "no vertex or geometry shader is present.\n"); + "no vertex, tessellation, or geometry shader is " + "present.\n"); goto done; } @@ -4465,91 +4461,80 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) goto done; } - /* Linking the stages in the opposite order (from fragment to vertex) - * ensures that inter-shader outputs written to in an earlier stage are - * eliminated if they are (transitively) not used in a later stage. + /* If there is no fragment shader we need to set transform feedback. + * + * For SSO we need also need to assign output locations, we assign them + * here because we need to do it for both single stage programs and multi + * stage programs. */ - int next; - - if (first < MESA_SHADER_FRAGMENT) { - gl_shader *const sh = prog->_LinkedShaders[last]; - - if (first != MESA_SHADER_VERTEX) { - /* There was no vertex shader, but we still have to assign varying - * locations for use by tessellation/geometry shader inputs in SSO. - * - * If the shader is not separable (i.e., prog->SeparateShader is - * false), linking will have already failed when first is not - * MESA_SHADER_VERTEX. - */ - if (!assign_varying_locations(ctx, mem_ctx, prog, - NULL, prog->_LinkedShaders[first], - num_tfeedback_decls, tfeedback_decls)) - goto done; - } - - if (last != MESA_SHADER_FRAGMENT && - (num_tfeedback_decls != 0 || prog->SeparateShader)) { - /* There was no fragment shader, but we still have to assign varying - * locations for use by transform feedback. - */ - if (!assign_varying_locations(ctx, mem_ctx, prog, - sh, NULL, - num_tfeedback_decls, tfeedback_decls)) - goto done; - } - - do_dead_builtin_varyings(ctx, sh, NULL, - num_tfeedback_decls, tfeedback_decls); + if (last < MESA_SHADER_FRAGMENT && + (num_tfeedback_decls != 0 || prog->SeparateShader)) { + if (!assign_varying_locations(ctx, mem_ctx, prog, + prog->_LinkedShaders[last], NULL, + num_tfeedback_decls, tfeedback_decls)) + goto done; + } - remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh, + if (last <= MESA_SHADER_FRAGMENT) { + /* Remove unused varyings from the first/last stage unless SSO */ + remove_unused_shader_inputs_and_outputs(prog->SeparateShader, + prog->_LinkedShaders[first], + ir_var_shader_in); + remove_unused_shader_inputs_and_outputs(prog->SeparateShader, + prog->_LinkedShaders[last], ir_var_shader_out); - } - else if (first == MESA_SHADER_FRAGMENT) { - /* If the program only contains a fragment shader... - */ - gl_shader *const sh = prog->_LinkedShaders[first]; - do_dead_builtin_varyings(ctx, NULL, sh, - num_tfeedback_decls, tfeedback_decls); + /* If the program is made up of only a single stage */ + if (first == last) { - if (prog->SeparateShader) { - if (!assign_varying_locations(ctx, mem_ctx, prog, - NULL /* producer */, - sh /* consumer */, - 0 /* num_tfeedback_decls */, - NULL /* tfeedback_decls */)) - goto done; - } else { - remove_unused_shader_inputs_and_outputs(false, sh, - ir_var_shader_in); - } - } + gl_shader *const sh = prog->_LinkedShaders[last]; + if (prog->SeparateShader) { + /* Assign input locations for SSO, output locations are already + * assigned. + */ + if (!assign_varying_locations(ctx, mem_ctx, prog, + NULL /* producer */, + sh /* consumer */, + 0 /* num_tfeedback_decls */, + NULL /* tfeedback_decls */)) + goto done; + } - next = last; - for (int i = next - 1; i >= 0; i--) { - if (prog->_LinkedShaders[i] == NULL) - continue; + do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL); + do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls, + tfeedback_decls); + } else { + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage + * are eliminated if they are (transitively) not used in a later + * stage. + */ + int next = last; + for (int i = next - 1; i >= 0; i--) { + if (prog->_LinkedShaders[i] == NULL) + continue; - gl_shader *const sh_i = prog->_LinkedShaders[i]; - gl_shader *const sh_next = prog->_LinkedShaders[next]; + gl_shader *const sh_i = prog->_LinkedShaders[i]; + gl_shader *const sh_next = prog->_LinkedShaders[next]; - if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, - next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, - tfeedback_decls)) - goto done; + if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls)) + goto done; - do_dead_builtin_varyings(ctx, sh_i, sh_next, - next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, - tfeedback_decls); + do_dead_builtin_varyings(ctx, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls); - /* This must be done after all dead varyings are eliminated. */ - if (!check_against_output_limit(ctx, prog, sh_i)) - goto done; - if (!check_against_input_limit(ctx, prog, sh_next)) - goto done; + /* This must be done after all dead varyings are eliminated. */ + if (!check_against_output_limit(ctx, prog, sh_i)) + goto done; + if (!check_against_input_limit(ctx, prog, sh_next)) + goto done; - next = i; + next = i; + } + } } if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls)) @@ -4569,38 +4554,38 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (!prog->LinkStatus) goto done; - /* OpenGL ES requires that a vertex shader and a fragment shader both be - * present in a linked program. GL_ARB_ES2_compatibility doesn't say + /* OpenGL ES < 3.1 requires that a vertex shader and a fragment shader both + * be present in a linked program. GL_ARB_ES2_compatibility doesn't say * anything about shader linking when one of the shaders (vertex or * fragment shader) is absent. So, the extension shouldn't change the * behavior specified in GLSL specification. + * + * From OpenGL ES 3.1 specification (7.3 Program Objects): + * "Linking can fail for a variety of reasons as specified in the + * OpenGL ES Shading Language Specification, as well as any of the + * following reasons: + * + * ... + * + * * program contains objects to form either a vertex shader or + * fragment shader, and program is not separable, and does not + * contain objects to form both a vertex shader and fragment + * shader." + * + * However, the only scenario in 3.1+ where we don't require them both is + * when we have a compute shader. For example: + * + * - No shaders is a link error. + * - Geom or Tess without a Vertex shader is a link error which means we + * always require a Vertex shader and hence a Fragment shader. + * - Finally a Compute shader linked with any other stage is a link error. */ - if (!prog->SeparateShader && ctx->API == API_OPENGLES2) { - /* With ES < 3.1 one needs to have always vertex + fragment shader. */ - if (ctx->Version < 31) { - if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { - linker_error(prog, "program lacks a vertex shader\n"); - } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { - linker_error(prog, "program lacks a fragment shader\n"); - } - } else { - /* From OpenGL ES 3.1 specification (7.3 Program Objects): - * "Linking can fail for a variety of reasons as specified in the - * OpenGL ES Shading Language Specification, as well as any of the - * following reasons: - * - * ... - * - * * program contains objects to form either a vertex shader or - * fragment shader, and program is not separable, and does not - * contain objects to form both a vertex shader and fragment - * shader." - */ - if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^ - !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { - linker_error(prog, "Program needs to contain both vertex and " - "fragment shaders.\n"); - } + if (!prog->SeparateShader && ctx->API == API_OPENGLES2 && + num_shaders[MESA_SHADER_COMPUTE] == 0) { + if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { + linker_error(prog, "program lacks a vertex shader\n"); + } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { + linker_error(prog, "program lacks a fragment shader\n"); } } diff --git a/src/compiler/glsl/tests/varyings_test.cpp b/src/compiler/glsl/tests/varyings_test.cpp index 0c4e0a471b8..9be5e8344b4 100644 --- a/src/compiler/glsl/tests/varyings_test.cpp +++ b/src/compiler/glsl/tests/varyings_test.cpp @@ -156,11 +156,11 @@ TEST_F(link_varyings, single_simple_input) ir.push_tail(v); - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); + linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk); EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); EXPECT_EQ(1u, num_elements(consumer_inputs)); @@ -183,11 +183,11 @@ TEST_F(link_varyings, gl_ClipDistance) ir.push_tail(clipdistance); - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); + linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk); EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]); EXPECT_TRUE(is_empty(consumer_inputs)); @@ -205,11 +205,11 @@ TEST_F(link_varyings, single_interface_input) ir.push_tail(v); - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); + linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk); char *const full_name = interface_field_name(simple_interface); EXPECT_EQ((void *) v, hash_table_find(consumer_interface_inputs, full_name)); @@ -236,11 +236,11 @@ TEST_F(link_varyings, one_interface_and_one_simple_input) ir.push_tail(iface); - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); + linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk); char *const iface_field_name = interface_field_name(simple_interface); @@ -252,24 +252,6 @@ TEST_F(link_varyings, one_interface_and_one_simple_input) EXPECT_EQ(1u, num_elements(consumer_inputs)); } -TEST_F(link_varyings, invalid_interface_input) -{ - ir_variable *const v = - new(mem_ctx) ir_variable(simple_interface, - "named_interface", - ir_var_shader_in); - - ASSERT_EQ(simple_interface, v->get_interface_type()); - - ir.push_tail(v); - - EXPECT_FALSE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); -} - TEST_F(link_varyings, interface_field_doesnt_match_noninterface) { char *const iface_field_name = interface_field_name(simple_interface); @@ -283,11 +265,11 @@ TEST_F(link_varyings, interface_field_doesnt_match_noninterface) ir.push_tail(in_v); - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); + linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk); /* Create an output variable, "v", that is part of an interface block named * "a". They should not match. @@ -325,11 +307,11 @@ TEST_F(link_varyings, interface_field_doesnt_match_noninterface_vice_versa) ir.push_tail(in_v); - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); + linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk); /* Create an output variable "a.v". They should not match. */ diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp index 2a3047dd33c..6a30023bc53 100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -376,8 +376,6 @@ nir_visitor::visit(ir_variable *ir) var->data.explicit_binding = ir->data.explicit_binding; var->data.has_initializer = ir->data.has_initializer; var->data.location_frac = ir->data.location_frac; - var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array; - var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray; switch (ir->data.depth_layout) { case ir_depth_layout_none: @@ -600,7 +598,7 @@ nir_visitor::visit(ir_emit_vertex *ir) { nir_intrinsic_instr *instr = nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); - instr->const_index[0] = ir->stream_id(); + nir_intrinsic_set_stream_id(instr, ir->stream_id()); nir_builder_instr_insert(&b, &instr->instr); } @@ -609,7 +607,7 @@ nir_visitor::visit(ir_end_primitive *ir) { nir_intrinsic_instr *instr = nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); - instr->const_index[0] = ir->stream_id(); + nir_intrinsic_set_stream_id(instr, ir->stream_id()); nir_builder_instr_insert(&b, &instr->instr); } @@ -889,7 +887,7 @@ nir_visitor::visit(ir_call *ir) instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = write_mask->value.u[0]; + nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); instr->num_components = val->type->vector_elements; nir_builder_instr_insert(&b, &instr->instr); @@ -987,7 +985,7 @@ nir_visitor::visit(ir_call *ir) exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - instr->const_index[0] = 0; + nir_intrinsic_set_base(instr, 0); instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); const glsl_type *type = ir->return_deref->var->type; @@ -1011,10 +1009,10 @@ nir_visitor::visit(ir_call *ir) ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); assert(write_mask); - instr->const_index[0] = 0; + nir_intrinsic_set_base(instr, 0); instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[1] = write_mask->value.u[0]; + nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); instr->num_components = val->type->vector_elements; @@ -1069,7 +1067,8 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr *store_instr = nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); store_instr->num_components = ir->return_deref->type->vector_elements; - store_instr->const_index[0] = (1 << store_instr->num_components) - 1; + nir_intrinsic_set_write_mask(store_instr, + (1 << store_instr->num_components) - 1); store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); @@ -1147,7 +1146,7 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); store->num_components = ir->lhs->type->vector_elements; - store->const_index[0] = ir->write_mask; + nir_intrinsic_set_write_mask(store, ir->write_mask); nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); store->variables[0] = nir_deref_as_var(store_deref); store->src[0] = nir_src_for_ssa(src); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7aba195fa69..ca5e2f2b779 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -226,24 +226,6 @@ typedef struct nir_variable { unsigned location_frac:2; /** - * Non-zero if this variable was created by lowering a named interface - * block which was not an array. - * - * Note that this variable and \c from_named_ifc_block_array will never - * both be non-zero. - */ - unsigned from_named_ifc_block_nonarray:1; - - /** - * Non-zero if this variable was created by lowering a named interface - * block which was an array. - * - * Note that this variable and \c from_named_ifc_block_nonarray will never - * both be non-zero. - */ - unsigned from_named_ifc_block_array:1; - - /** * \brief Layout qualifier for gl_FragDepth. * * This is not equal to \c ir_depth_layout_none if and only if this @@ -835,7 +817,7 @@ typedef struct { } nir_call_instr; #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \ - num_variables, num_indices, flags) \ + num_variables, num_indices, idx0, idx1, idx2, flags) \ nir_intrinsic_##name, #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name, @@ -848,6 +830,8 @@ typedef enum { #undef INTRINSIC #undef LAST_INTRINSIC +#define NIR_INTRINSIC_MAX_CONST_INDEX 3 + /** Represents an intrinsic * * An intrinsic is an instruction type for handling things that are @@ -891,7 +875,7 @@ typedef struct { */ uint8_t num_components; - int const_index[3]; + int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; nir_deref_var *variables[2]; @@ -920,6 +904,55 @@ typedef enum { NIR_INTRINSIC_CAN_REORDER = (1 << 1), } nir_intrinsic_semantic_flag; +/** + * \name NIR intrinsics const-index flag + * + * Indicates the usage of a const_index slot. + * + * \sa nir_intrinsic_info::index_map + */ +typedef enum { + /** + * Generally instructions that take a offset src argument, can encode + * a constant 'base' value which is added to the offset. + */ + NIR_INTRINSIC_BASE = 1, + + /** + * For store instructions, a writemask for the store. + */ + NIR_INTRINSIC_WRMASK = 2, + + /** + * The stream-id for GS emit_vertex/end_primitive intrinsics. + */ + NIR_INTRINSIC_STREAM_ID = 3, + + /** + * The clip-plane id for load_user_clip_plane intrinsic. + */ + NIR_INTRINSIC_UCP_ID = 4, + + /** + * The range of a load operation. This specifies the maximum amount of + * data starting at the base offset (if any) that can be accessed. + */ + NIR_INTRINSIC_RANGE = 5, + + /** + * The Vulkan descriptor set for vulkan_resource_index intrinsic. + */ + NIR_INTRINSIC_DESC_SET = 6, + + /** + * The Vulkan descriptor set binding for vulkan_resource_index intrinsic. + */ + NIR_INTRINSIC_BINDING = 7, + + NIR_INTRINSIC_NUM_INDEX_FLAGS, + +} nir_intrinsic_index_flag; + #define NIR_INTRINSIC_MAX_INPUTS 4 typedef struct { @@ -949,12 +982,40 @@ typedef struct { /** the number of constant indices used by the intrinsic */ unsigned num_indices; + /** indicates the usage of intr->const_index[n] */ + unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; + /** semantic flags for calls to this intrinsic */ nir_intrinsic_semantic_flag flags; } nir_intrinsic_info; extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; + +#define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ +static inline type \ +nir_intrinsic_##name(nir_intrinsic_instr *instr) \ +{ \ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ + assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ + return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \ +} \ +static inline void \ +nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \ +{ \ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ + assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ + instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \ +} + +INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned) +INTRINSIC_IDX_ACCESSORS(base, BASE, int) +INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned) +INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned) +INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) +INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) +INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) + /** * \group texture information * diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index 14c0e822ad8..2357b57117a 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -102,13 +102,10 @@ class Constant(Value): self.value = val def __hex__(self): - # Even if it's an integer, we still need to unpack as an unsigned - # int. This is because, without C99, we can only assign to the first - # element of a union in an initializer. if isinstance(self.value, (bool)): return 'NIR_TRUE' if self.value else 'NIR_FALSE' if isinstance(self.value, (int, long)): - return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0]) + return hex(self.value) elif isinstance(self.value, float): return hex(struct.unpack('I', struct.pack('f', self.value))[0]) else: @@ -216,7 +213,7 @@ ${pass_name}_block(nir_block *block, void *void_state) { struct opt_state *state = void_state; - nir_foreach_instr_safe(block, instr) { + nir_foreach_instr_reverse_safe(block, instr) { if (instr->type != nir_instr_type_alu) continue; @@ -255,7 +252,7 @@ ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags) state.progress = false; state.condition_flags = condition_flags; - nir_foreach_block(impl, ${pass_name}_block, &state); + nir_foreach_block_reverse(impl, ${pass_name}_block, &state); if (state.progress) nir_metadata_preserve(impl, nir_metadata_block_index | diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 1c7c78acae8..b4dde54f7e7 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -374,7 +374,7 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, nir_intrinsic_instr *store = nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); store->num_components = num_components; - store->const_index[0] = writemask; + nir_intrinsic_set_write_mask(store, writemask); store->variables[0] = nir_deref_var_create(store, var); store->src[0] = nir_src_for_ssa(value); nir_builder_instr_insert(build, &store->instr); diff --git a/src/compiler/nir/nir_intrinsics.c b/src/compiler/nir/nir_intrinsics.c index a7c868c39af..0257b19b348 100644 --- a/src/compiler/nir/nir_intrinsics.c +++ b/src/compiler/nir/nir_intrinsics.c @@ -30,7 +30,8 @@ #define OPCODE(name) nir_intrinsic_##name #define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \ - _dest_components, _num_variables, _num_indices, _flags) \ + _dest_components, _num_variables, _num_indices, \ + idx0, idx1, idx2, _flags) \ { \ .name = #_name, \ .num_srcs = _num_srcs, \ @@ -39,9 +40,16 @@ .dest_components = _dest_components, \ .num_variables = _num_variables, \ .num_indices = _num_indices, \ + .index_map = { \ + [NIR_INTRINSIC_ ## idx0] = 1, \ + [NIR_INTRINSIC_ ## idx1] = 2, \ + [NIR_INTRINSIC_ ## idx2] = 3, \ + }, \ .flags = _flags \ }, +#define NIR_INTRINSIC_xx 0 + #define LAST_INTRINSIC(name) const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = { diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 3e7cf735a1b..fa162f9d126 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -30,7 +30,7 @@ * expands to a list of macros of the form: * * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, - * num_variables, num_indices, flags) + * num_variables, num_indices, idx0, idx1, idx2, flags) * * Which should correspond one-to-one with the nir_intrinsic_info structure. It * is included in both ir.h to create the nir_intrinsic enum (with members of @@ -42,9 +42,9 @@ #define ARR(...) { __VA_ARGS__ } -INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0) -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0) +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0) /* * Interpolation of input. The interp_var_at* intrinsics are similar to the @@ -54,25 +54,25 @@ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) * respectively. */ -INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, +INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, +INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, +INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* * Ask the driver for the size of a given buffer. It takes the buffer index * as source. */ -INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, +INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* * a barrier is an intrinsic with no inputs/outputs but which can't be moved * around/optimized in general */ -#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) +#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0) BARRIER(barrier) BARRIER(discard) @@ -89,7 +89,7 @@ BARRIER(memory_barrier) * The latter can be used as code motion barrier, which is currently not * feasible with NIR. */ -INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* * Memory barrier with semantics analogous to the compute shader @@ -103,7 +103,7 @@ BARRIER(memory_barrier_image) BARRIER(memory_barrier_shared) /** A conditional discard, with a single boolean source. */ -INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) +INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0) /** * Basic Geometry Shader intrinsics. @@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) * * end_primitive implements GLSL's EndPrimitive() built-in. */ -INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0) -INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) +INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0) +INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0) /** * Geometry Shader intrinsics with a vertex count. @@ -125,9 +125,9 @@ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) * These maintain a count of the number of vertices emitted, as an additional * unsigned integer source. */ -INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0) -INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0) -INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0) +INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0) +INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0) +INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0) /* * Atomic counters @@ -137,8 +137,8 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0) */ #define ATOMIC(name, flags) \ - INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \ - INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags) + INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \ + INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags) ATOMIC(inc, 0) ATOMIC(dec, 0) @@ -159,20 +159,20 @@ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) * either one or two additional scalar arguments with the same meaning as in * the ARB_shader_image_load_store specification. */ -INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) -INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, +INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* @@ -191,7 +191,8 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, * The intended usage is that the shader will call vulkan_surface_index to * get an index and then pass that as the buffer index ubo/ssbo calls. */ -INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, +INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 2, + DESC_SET, BINDING, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* @@ -210,16 +211,16 @@ INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, * * All operations take 1 variable deref. */ -INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0) -INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, xx, xx, xx, 0) /* * SSBO atomic intrinsics @@ -238,16 +239,16 @@ INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0) * in ssbo_atomic_add, etc). * 3: For CompSwap only: the second data parameter. */ -INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) /* * CS shared variable atomic intrinsics @@ -265,42 +266,43 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) * in shared_atomic_add, etc). * 2: For CompSwap only: the second data parameter. */ -INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) - -#define SYSTEM_VALUE(name, components, num_indices) \ +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) + +#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ + idx0, idx1, idx2, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -SYSTEM_VALUE(front_face, 1, 0) -SYSTEM_VALUE(vertex_id, 1, 0) -SYSTEM_VALUE(vertex_id_zero_base, 1, 0) -SYSTEM_VALUE(base_vertex, 1, 0) -SYSTEM_VALUE(instance_id, 1, 0) -SYSTEM_VALUE(base_instance, 1, 0) -SYSTEM_VALUE(draw_id, 1, 0) -SYSTEM_VALUE(sample_id, 1, 0) -SYSTEM_VALUE(sample_pos, 2, 0) -SYSTEM_VALUE(sample_mask_in, 1, 0) -SYSTEM_VALUE(primitive_id, 1, 0) -SYSTEM_VALUE(invocation_id, 1, 0) -SYSTEM_VALUE(tess_coord, 3, 0) -SYSTEM_VALUE(tess_level_outer, 4, 0) -SYSTEM_VALUE(tess_level_inner, 2, 0) -SYSTEM_VALUE(patch_vertices_in, 1, 0) -SYSTEM_VALUE(local_invocation_id, 3, 0) -SYSTEM_VALUE(work_group_id, 3, 0) -SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ -SYSTEM_VALUE(num_work_groups, 3, 0) -SYSTEM_VALUE(helper_invocation, 1, 0) +SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx) +SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx) +SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx) +SYSTEM_VALUE(base_vertex, 1, 0, xx, xx, xx) +SYSTEM_VALUE(instance_id, 1, 0, xx, xx, xx) +SYSTEM_VALUE(base_instance, 1, 0, xx, xx, xx) +SYSTEM_VALUE(draw_id, 1, 0, xx, xx, xx) +SYSTEM_VALUE(sample_id, 1, 0, xx, xx, xx) +SYSTEM_VALUE(sample_pos, 2, 0, xx, xx, xx) +SYSTEM_VALUE(sample_mask_in, 1, 0, xx, xx, xx) +SYSTEM_VALUE(primitive_id, 1, 0, xx, xx, xx) +SYSTEM_VALUE(invocation_id, 1, 0, xx, xx, xx) +SYSTEM_VALUE(tess_coord, 3, 0, xx, xx, xx) +SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx) +SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx) +SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx) +SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx) +SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) +SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) +SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) +SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) /* * Load operations pull data from some piece of GPU memory. All load @@ -323,27 +325,29 @@ SYSTEM_VALUE(helper_invocation, 1, 0) * offsets are always in bytes. */ -#define LOAD(name, srcs, indices, flags) \ - INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) +#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \ + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags) -/* src[] = { offset }. const_index[] = { base, size } */ -LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base, range } */ +LOAD(uniform, 1, 2, BASE, RANGE, xx, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { buffer_index, offset }. No const_index */ -LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { offset }. const_index[] = { base } */ -LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(input, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { vertex, offset }. const_index[] = { base } */ -LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(per_vertex_input, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { buffer_index, offset }. No const_index */ -LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) +LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* src[] = { offset }. const_index[] = { base } */ -LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* src[] = { vertex, offset }. const_index[] = { base } */ -LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) +LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* src[] = { offset }. const_index[] = { base } */ -LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -/* src[] = { offset }. const_index[] = { base, size } */ -LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base, range } */ +LOAD(push_constant, 1, 2, BASE, RANGE, xx, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* * Stores work the same way as loads, except now the first source is the value @@ -352,16 +356,16 @@ LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDE * const_index[0]. */ -#define STORE(name, srcs, indices, flags) \ - INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) +#define STORE(name, srcs, num_indices, idx0, idx1, idx2, flags) \ + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, num_indices, idx0, idx1, idx2, flags) /* src[] = { value, offset }. const_index[] = { base, write_mask } */ -STORE(output, 2, 2, 0) +STORE(output, 2, 2, BASE, WRMASK, xx, 0) /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */ -STORE(per_vertex_output, 3, 2, 0) +STORE(per_vertex_output, 3, 2, BASE, WRMASK, xx, 0) /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ -STORE(ssbo, 3, 1, 0) +STORE(ssbo, 3, 1, WRMASK, xx, xx, 0) /* src[] = { value, offset }. const_index[] = { base, write_mask } */ -STORE(shared, 2, 2, 0) +STORE(shared, 2, 2, BASE, WRMASK, xx, 0) LAST_INTRINSIC(store_shared) diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index b07e199d71b..eefcb55a0a6 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -71,8 +71,8 @@ lower_instr(nir_intrinsic_instr *instr, unsigned uniform_loc = instr->variables[0]->var->data.location; nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); - new_instr->const_index[0] = - state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; + nir_intrinsic_set_base(new_instr, + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index); nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); offset_const->value.u[0] = instr->variables[0]->var->data.offset; diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c index 0ca6a289396..bcbad536874 100644 --- a/src/compiler/nir/nir_lower_clip.c +++ b/src/compiler/nir/nir_lower_clip.c @@ -71,8 +71,8 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val) store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); store->num_components = 4; - store->const_index[0] = out->data.driver_location; - store->const_index[1] = 0xf; /* wrmask */ + nir_intrinsic_set_base(store, out->data.driver_location); + nir_intrinsic_set_write_mask(store, 0xf); store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]); store->src[0].is_ssa = true; store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); @@ -86,7 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val) load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); load->num_components = 4; - load->const_index[0] = in->data.driver_location; + nir_intrinsic_set_base(load, in->data.driver_location); load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -112,7 +112,7 @@ find_output_in_block(nir_block *block, void *void_state) if (instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if ((intr->intrinsic == nir_intrinsic_store_output) && - intr->const_index[0] == state->drvloc) { + nir_intrinsic_base(intr) == state->drvloc) { assert(state->def == NULL); assert(intr->src[0].is_ssa); assert(nir_src_as_const_value(intr->src[1])); diff --git a/src/compiler/nir/nir_lower_gs_intrinsics.c b/src/compiler/nir/nir_lower_gs_intrinsics.c index fdff1656b4d..14abfe3f509 100644 --- a/src/compiler/nir/nir_lower_gs_intrinsics.c +++ b/src/compiler/nir/nir_lower_gs_intrinsics.c @@ -93,7 +93,7 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state) nir_intrinsic_instr *lowered = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex_with_counter); - lowered->const_index[0] = intrin->const_index[0]; + nir_intrinsic_set_stream_id(lowered, nir_intrinsic_stream_id(intrin)); lowered->src[0] = nir_src_for_ssa(count); nir_builder_instr_insert(b, &lowered->instr); @@ -121,7 +121,7 @@ rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state) nir_intrinsic_instr *lowered = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive_with_counter); - lowered->const_index[0] = intrin->const_index[0]; + nir_intrinsic_set_stream_id(lowered, nir_intrinsic_stream_id(intrin)); lowered->src[0] = nir_src_for_ssa(count); nir_builder_instr_insert(b, &lowered->instr); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 2c5fa16af5e..84e353775cf 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -274,8 +274,8 @@ nir_lower_io_block(nir_block *block, void *void_state) load_op(state, mode, per_vertex)); load->num_components = intrin->num_components; - load->const_index[0] = - intrin->variables[0]->var->data.driver_location; + nir_intrinsic_set_base(load, + intrin->variables[0]->var->data.driver_location); if (load->intrinsic == nir_intrinsic_load_uniform) { load->const_index[1] = @@ -321,11 +321,9 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_src_copy(&store->src[0], &intrin->src[0], store); - store->const_index[0] = - intrin->variables[0]->var->data.driver_location; - - /* Copy the writemask */ - store->const_index[1] = intrin->const_index[0]; + nir_intrinsic_set_base(store, + intrin->variables[0]->var->data.driver_location); + nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); if (per_vertex) store->src[1] = nir_src_for_ssa(vertex_index); diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c index 51b0fa733f2..45036fa7787 100644 --- a/src/compiler/nir/nir_lower_locals_to_regs.c +++ b/src/compiler/nir/nir_lower_locals_to_regs.c @@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); nir_src_copy(&mov->src[0].src, &intrin->src[0], mov); - mov->dest.write_mask = intrin->const_index[0]; + mov->dest.write_mask = nir_intrinsic_write_mask(intrin); mov->dest.dest.is_ssa = false; mov->dest.dest.reg.reg = reg_src.reg.reg; mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c index 1294cb89004..fe3507cb7a3 100644 --- a/src/compiler/nir/nir_lower_two_sided_color.c +++ b/src/compiler/nir/nir_lower_two_sided_color.c @@ -72,7 +72,7 @@ load_input(nir_builder *b, nir_variable *in) load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); load->num_components = 4; - load->const_index[0] = in->data.driver_location; + nir_intrinsic_set_base(load, in->data.driver_location); load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -151,7 +151,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state) for (idx = 0; idx < state->colors_count; idx++) { unsigned drvloc = state->colors[idx].front->data.driver_location; - if (intr->const_index[0] == drvloc) { + if (nir_intrinsic_base(intr) == drvloc) { assert(nir_src_as_const_value(intr->src[0])); break; } diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c index 8cb3edd0a84..7db9839c369 100644 --- a/src/compiler/nir/nir_lower_var_copies.c +++ b/src/compiler/nir/nir_lower_var_copies.c @@ -128,7 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); store->num_components = num_components; - store->const_index[0] = (1 << num_components) - 1; + nir_intrinsic_set_write_mask(store, (1 << num_components) - 1); store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); store->src[0].is_ssa = true; diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c index e1f368d2f2b..a3f3fcfd9b4 100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -560,7 +560,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_ssa_def *new_def; b.cursor = nir_before_instr(&intrin->instr); - if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { + unsigned wrmask = nir_intrinsic_write_mask(intrin); + if (wrmask == (1 << intrin->num_components) - 1) { /* Whole variable store - just copy the source. Note that * intrin->num_components and intrin->src[0].ssa->num_components * may differ. @@ -580,7 +581,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) */ nir_ssa_def *srcs[4]; for (unsigned i = 0; i < intrin->num_components; i++) { - if (intrin->const_index[0] & (1 << i)) { + if (wrmask & (1 << i)) { srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); } else { srcs[i] = nir_channel(&b, old_def, i); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index d4f4a3d903c..c9c917b77a5 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -151,6 +151,8 @@ optimizations = [ (('ior', a, 0), a), (('fxor', a, a), 0.0), (('ixor', a, a), 0), + (('fxor', a, 0.0), a), + (('ixor', a, 0), a), (('inot', ('inot', a)), a), # DeMorgan's Laws (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), @@ -167,6 +169,8 @@ optimizations = [ (('flog2', ('fexp2', a)), a), # lg2(2^a) = a (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b + (('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))), + ('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), @@ -313,6 +317,19 @@ optimizations = [ 'options->lower_unpack_snorm_4x8'), ] +# Unreal Engine 4 demo applications open-codes bitfieldReverse() +def bitfield_reverse(u): + step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16)) + step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8)) + step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4)) + step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2)) + step5 = ('ior', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1)) + + return step5 + +optimizations += [(bitfield_reverse('x'), ('bitfield_reverse', 'x'))] + + # Add optimizations to handle the case where the result of a ternary is # compared to a constant. This way we can take things like # diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 276a948460c..f0ac0f21dd0 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -451,15 +451,16 @@ print_deref(nir_deref_var *deref, print_state *state) static void print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) { - unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + unsigned num_srcs = info->num_srcs; FILE *fp = state->fp; - if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + if (info->has_dest) { print_dest(&instr->dest, state); fprintf(fp, " = "); } - fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name); + fprintf(fp, "intrinsic %s (", info->name); for (unsigned i = 0; i < num_srcs; i++) { if (i != 0) @@ -470,9 +471,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) fprintf(fp, ") ("); - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - - for (unsigned i = 0; i < num_vars; i++) { + for (unsigned i = 0; i < info->num_variables; i++) { if (i != 0) fprintf(fp, ", "); @@ -481,9 +480,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) fprintf(fp, ") ("); - unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices; - - for (unsigned i = 0; i < num_indices; i++) { + for (unsigned i = 0; i < info->num_indices; i++) { if (i != 0) fprintf(fp, ", "); @@ -492,6 +489,34 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) fprintf(fp, ")"); + static const char *index_name[NIR_INTRINSIC_NUM_INDEX_FLAGS] = { + [NIR_INTRINSIC_BASE] = "base", + [NIR_INTRINSIC_WRMASK] = "wrmask", + [NIR_INTRINSIC_STREAM_ID] = "stream-id", + [NIR_INTRINSIC_UCP_ID] = "ucp-id", + [NIR_INTRINSIC_RANGE] = "range", + [NIR_INTRINSIC_DESC_SET] = "desc-set", + [NIR_INTRINSIC_BINDING] = "binding", + }; + for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) { + if (!info->index_map[idx]) + continue; + fprintf(fp, " /*"); + if (idx == NIR_INTRINSIC_WRMASK) { + /* special case wrmask to show it as a writemask.. */ + unsigned wrmask = nir_intrinsic_write_mask(instr); + fprintf(fp, " wrmask="); + for (unsigned i = 0; i < 4; i++) + if ((wrmask >> i) & 1) + fprintf(fp, "%c", "xyzw"[i]); + } else { + unsigned off = info->index_map[idx] - 1; + assert(index_name[idx]); /* forgot to update index_name table? */ + fprintf(fp, " %s=%d", index_name[idx], instr->const_index[off]); + } + fprintf(fp, " */"); + } + if (!state->shader) return; @@ -515,7 +540,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) } nir_foreach_variable(var, var_list) { - if ((var->data.driver_location == instr->const_index[0]) && + if ((var->data.driver_location == nir_intrinsic_base(instr)) && var->name) { fprintf(fp, "\t/* %s */", var->name); break; diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 00184cabe20..0509d482f0b 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -417,7 +417,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) assert(instr->variables[0]->var->data.mode != nir_var_shader_in && instr->variables[0]->var->data.mode != nir_var_uniform && instr->variables[0]->var->data.mode != nir_var_shader_storage); - assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0); + assert((nir_intrinsic_write_mask(instr) & ~((1 << instr->num_components) - 1)) == 0); break; } case nir_intrinsic_copy_var: diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c index 3ad98aa5310..5ca24201498 100644 --- a/src/compiler/nir/spirv/vtn_variables.c +++ b/src/compiler/nir/spirv/vtn_variables.c @@ -319,8 +319,8 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_vulkan_resource_index); instr->src[0] = nir_src_for_ssa(array_index); - instr->const_index[0] = chain->var->descriptor_set; - instr->const_index[1] = chain->var->binding; + nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set); + nir_intrinsic_set_binding(instr, chain->var->binding); nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); nir_builder_instr_insert(&b->nb, &instr->instr); |