/* * Copyright © 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "vtn_private.h" static void vtn_build_subgroup_instr(struct vtn_builder *b, nir_intrinsic_op nir_op, struct vtn_ssa_value *dst, struct vtn_ssa_value *src0, nir_ssa_def *index, unsigned const_idx0, unsigned const_idx1) { /* Some of the subgroup operations take an index. SPIR-V allows this to be * any integer type. To make things simpler for drivers, we only support * 32-bit indices. */ if (index && index->bit_size != 32) index = nir_u2u32(&b->nb, index); vtn_assert(dst->type == src0->type); if (!glsl_type_is_vector_or_scalar(dst->type)) { for (unsigned i = 0; i < glsl_get_length(dst->type); i++) { vtn_build_subgroup_instr(b, nir_op, dst->elems[i], src0->elems[i], index, const_idx0, const_idx1); } return; } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, nir_op); nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dst->type, NULL); intrin->num_components = intrin->dest.ssa.num_components; intrin->src[0] = nir_src_for_ssa(src0->def); if (index) intrin->src[1] = nir_src_for_ssa(index); intrin->const_index[0] = const_idx0; intrin->const_index[1] = const_idx1; nir_builder_instr_insert(&b->nb, &intrin->instr); dst->def = &intrin->dest.ssa; } void vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->ssa = vtn_create_ssa_value(b, val->type->type); switch (opcode) { case SpvOpGroupNonUniformElect: { vtn_fail_if(val->type->type != glsl_bool_type(), "OpGroupNonUniformElect must return a Bool"); nir_intrinsic_instr *elect = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect); nir_ssa_dest_init(&elect->instr, &elect->dest, 1, 32, NULL); nir_builder_instr_insert(&b->nb, &elect->instr); val->ssa->def = &elect->dest.ssa; break; } case SpvOpGroupNonUniformBallot: { vtn_fail_if(val->type->type != glsl_vector_type(GLSL_TYPE_UINT, 4), "OpGroupNonUniformBallot must return a uvec4"); nir_intrinsic_instr *ballot = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot); ballot->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); nir_ssa_dest_init(&ballot->instr, &ballot->dest, 4, 32, NULL); ballot->num_components = 4; nir_builder_instr_insert(&b->nb, &ballot->instr); val->ssa->def = &ballot->dest.ssa; break; } case SpvOpGroupNonUniformInverseBallot: { /* This one is just a BallotBitfieldExtract with subgroup invocation. * We could add a NIR intrinsic but it's easier to just lower it on the * spot. */ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot_bitfield_extract); intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb)); nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); val->ssa->def = &intrin->dest.ssa; break; } case SpvOpGroupNonUniformBallotBitExtract: case SpvOpGroupNonUniformBallotBitCount: case SpvOpGroupNonUniformBallotFindLSB: case SpvOpGroupNonUniformBallotFindMSB: { nir_ssa_def *src0, *src1 = NULL; nir_intrinsic_op op; switch (opcode) { case SpvOpGroupNonUniformBallotBitExtract: op = nir_intrinsic_ballot_bitfield_extract; src0 = vtn_ssa_value(b, w[4])->def; src1 = vtn_ssa_value(b, w[5])->def; break; case SpvOpGroupNonUniformBallotBitCount: switch ((SpvGroupOperation)w[4]) { case SpvGroupOperationReduce: op = nir_intrinsic_ballot_bit_count_reduce; break; case SpvGroupOperationInclusiveScan: op = nir_intrinsic_ballot_bit_count_inclusive; break; case SpvGroupOperationExclusiveScan: op = nir_intrinsic_ballot_bit_count_exclusive; break; default: unreachable("Invalid group operation"); } src0 = vtn_ssa_value(b, w[5])->def; break; case SpvOpGroupNonUniformBallotFindLSB: op = nir_intrinsic_ballot_find_lsb; src0 = vtn_ssa_value(b, w[4])->def; break; case SpvOpGroupNonUniformBallotFindMSB: op = nir_intrinsic_ballot_find_msb; src0 = vtn_ssa_value(b, w[4])->def; break; default: unreachable("Unhandled opcode"); } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); intrin->src[0] = nir_src_for_ssa(src0); if (src1) intrin->src[1] = nir_src_for_ssa(src1); nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); val->ssa->def = &intrin->dest.ssa; break; } case SpvOpGroupNonUniformBroadcastFirst: vtn_build_subgroup_instr(b, nir_intrinsic_read_first_invocation, val->ssa, vtn_ssa_value(b, w[4]), NULL, 0, 0); break; case SpvOpGroupNonUniformBroadcast: vtn_build_subgroup_instr(b, nir_intrinsic_read_invocation, val->ssa, vtn_ssa_value(b, w[4]), vtn_ssa_value(b, w[5])->def, 0, 0); break; case SpvOpGroupNonUniformAll: case SpvOpGroupNonUniformAny: case SpvOpGroupNonUniformAllEqual: { vtn_fail_if(val->type->type != glsl_bool_type(), "OpGroupNonUniform(All|Any|AllEqual) must return a bool"); nir_intrinsic_op op; switch (opcode) { case SpvOpGroupNonUniformAll: op = nir_intrinsic_vote_all; break; case SpvOpGroupNonUniformAny: op = nir_intrinsic_vote_any; break; case SpvOpGroupNonUniformAllEqual: { switch (glsl_get_base_type(val->type->type)) { case GLSL_TYPE_FLOAT: case GLSL_TYPE_DOUBLE: op = nir_intrinsic_vote_feq; break; case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: case GLSL_TYPE_BOOL: op = nir_intrinsic_vote_ieq; break; default: unreachable("Unhandled type"); } break; } default: unreachable("Unhandled opcode"); } nir_ssa_def *src0 = vtn_ssa_value(b, w[4])->def; nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); intrin->num_components = src0->num_components; intrin->src[0] = nir_src_for_ssa(src0); nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); val->ssa->def = &intrin->dest.ssa; break; } case SpvOpGroupNonUniformShuffle: case SpvOpGroupNonUniformShuffleXor: case SpvOpGroupNonUniformShuffleUp: case SpvOpGroupNonUniformShuffleDown: { nir_intrinsic_op op; switch (opcode) { case SpvOpGroupNonUniformShuffle: op = nir_intrinsic_shuffle; break; case SpvOpGroupNonUniformShuffleXor: op = nir_intrinsic_shuffle_xor; break; case SpvOpGroupNonUniformShuffleUp: op = nir_intrinsic_shuffle_up; break; case SpvOpGroupNonUniformShuffleDown: op = nir_intrinsic_shuffle_down; break; default: unreachable("Invalid opcode"); } vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]), vtn_ssa_value(b, w[5])->def, 0, 0); break; } case SpvOpGroupNonUniformQuadBroadcast: vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast, val->ssa, vtn_ssa_value(b, w[4]), vtn_ssa_value(b, w[5])->def, 0, 0); break; case SpvOpGroupNonUniformQuadSwap: { unsigned direction = vtn_constant_value(b, w[5])->values[0].u32[0]; nir_intrinsic_op op; switch (direction) { case 0: op = nir_intrinsic_quad_swap_horizontal; break; case 1: op = nir_intrinsic_quad_swap_vertical; break; case 2: op = nir_intrinsic_quad_swap_diagonal; break; default: vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap"); } vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]), NULL, 0, 0); break; } case SpvOpGroupNonUniformIAdd: case SpvOpGroupNonUniformFAdd: case SpvOpGroupNonUniformIMul: case SpvOpGroupNonUniformFMul: case SpvOpGroupNonUniformSMin: case SpvOpGroupNonUniformUMin: case SpvOpGroupNonUniformFMin: case SpvOpGroupNonUniformSMax: case SpvOpGroupNonUniformUMax: case SpvOpGroupNonUniformFMax: case SpvOpGroupNonUniformBitwiseAnd: case SpvOpGroupNonUniformBitwiseOr: case SpvOpGroupNonUniformBitwiseXor: case SpvOpGroupNonUniformLogicalAnd: case SpvOpGroupNonUniformLogicalOr: case SpvOpGroupNonUniformLogicalXor: { nir_op reduction_op; switch (opcode) { case SpvOpGroupNonUniformIAdd: reduction_op = nir_op_iadd; break; case SpvOpGroupNonUniformFAdd: reduction_op = nir_op_fadd; break; case SpvOpGroupNonUniformIMul: reduction_op = nir_op_imul; break; case SpvOpGroupNonUniformFMul: reduction_op = nir_op_fmul; break; case SpvOpGroupNonUniformSMin: reduction_op = nir_op_imin; break; case SpvOpGroupNonUniformUMin: reduction_op = nir_op_umin; break; case SpvOpGroupNonUniformFMin: reduction_op = nir_op_fmin; break; case SpvOpGroupNonUniformSMax: reduction_op = nir_op_imax; break; case SpvOpGroupNonUniformUMax: reduction_op = nir_op_umax; break; case SpvOpGroupNonUniformFMax: reduction_op = nir_op_fmax; break; case SpvOpGroupNonUniformBitwiseAnd: case SpvOpGroupNonUniformLogicalAnd: reduction_op = nir_op_iand; break; case SpvOpGroupNonUniformBitwiseOr: case SpvOpGroupNonUniformLogicalOr: reduction_op = nir_op_ior; break; case SpvOpGroupNonUniformBitwiseXor: case SpvOpGroupNonUniformLogicalXor: reduction_op = nir_op_ixor; break; default: unreachable("Invalid reduction operation"); } nir_intrinsic_op op; unsigned cluster_size = 0; switch ((SpvGroupOperation)w[4]) { case SpvGroupOperationReduce: op = nir_intrinsic_reduce; break; case SpvGroupOperationInclusiveScan: op = nir_intrinsic_inclusive_scan; break; case SpvGroupOperationExclusiveScan: op = nir_intrinsic_exclusive_scan; break; case SpvGroupOperationClusteredReduce: op = nir_intrinsic_reduce; assert(count == 7); cluster_size = vtn_constant_value(b, w[6])->values[0].u32[0]; break; default: unreachable("Invalid group operation"); } vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[5]), NULL, reduction_op, cluster_size); break; } default: unreachable("Invalid SPIR-V opcode"); } }