diff options
author | Eric Anholt <[email protected]> | 2018-05-08 11:24:40 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2018-06-06 13:44:28 -0700 |
commit | 6e1597c2d9f5e14ffaf1c326985ee3203f995044 (patch) | |
tree | 5e01c761ac0144b054680d3f529b93a06de999ad /src/compiler/nir | |
parent | 6a0db5f08ffac7d43a5b937982262f357a21f95b (diff) |
nir: Add an ALU lowering pass for mul_high.
This is based on the glsl/lower_instructions.cpp implementation, but
should be much more readable.
Reviewed-by: Matt Turner <[email protected]>
Reviewed-by: Ian Romanick <[email protected]>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r-- | src/compiler/nir/meson.build | 1 | ||||
-rw-r--r-- | src/compiler/nir/nir.h | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_alu.c | 165 |
3 files changed, 169 insertions, 0 deletions
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 3fec363691d..598c68aff9f 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -104,6 +104,7 @@ files_libnir = files( 'nir_liveness.c', 'nir_loop_analyze.c', 'nir_loop_analyze.h', + 'nir_lower_alu.c', 'nir_lower_alu_to_scalar.c', 'nir_lower_alpha_test.c', 'nir_lower_atomics_to_ssbo.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b9426f8eb4e..7d01eb23bc4 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1919,6 +1919,8 @@ typedef struct nir_shader_compiler_options { bool lower_find_lsb; bool lower_uadd_carry; bool lower_usub_borrow; + /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ + bool lower_mul_high; /** lowers fneg and ineg to fsub and isub. */ bool lower_negate; /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ @@ -2628,6 +2630,7 @@ bool nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader); void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, bool alpha_to_one); +bool nir_lower_alu(nir_shader *shader); bool nir_lower_alu_to_scalar(nir_shader *shader); bool nir_lower_load_const_to_scalar(nir_shader *shader); bool nir_lower_read_invocation_to_scalar(nir_shader *shader); diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c new file mode 100644 index 00000000000..28ecaf6badc --- /dev/null +++ b/src/compiler/nir/nir_lower_alu.c @@ -0,0 +1,165 @@ +/* + * Copyright © 2010 Intel Corporation + * Copyright © 2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** nir_lower_alu.c + * + * NIR's home for miscellaneous ALU operation lowering implementations. + * + * Most NIR ALU lowering occurs in nir_opt_algebraic.py, since it's generally + * easy to write them there. However, if terms appear multiple times in the + * lowered code, it can get very verbose and cause a lot of work for CSE, so + * it may end up being easier to write out in C code. + * + * The shader must be in SSA for this pass. + */ + +#define LOWER_MUL_HIGH (1 << 0) + +static bool +lower_alu_instr(nir_alu_instr *instr, nir_builder *b) +{ + nir_ssa_def *lowered = NULL; + + assert(instr->dest.dest.is_ssa); + + b->cursor = nir_before_instr(&instr->instr); + b->exact = instr->exact; + + switch (instr->op) { + case nir_op_imul_high: + case nir_op_umul_high: + if (b->shader->options->lower_mul_high) { + nir_ssa_def *c1 = nir_imm_int(b, 1); + nir_ssa_def *c16 = nir_imm_int(b, 16); + + nir_ssa_def *src0 = nir_ssa_for_alu_src(b, instr, 0); + nir_ssa_def *src1 = nir_ssa_for_alu_src(b, instr, 1); + nir_ssa_def *different_signs = NULL; + if (instr->op == nir_op_imul_high) { + nir_ssa_def *c0 = nir_imm_int(b, 0); + different_signs = nir_ixor(b, + nir_ilt(b, src0, c0), + nir_ilt(b, src1, c0)); + src0 = nir_iabs(b, src0); + src1 = nir_iabs(b, src1); + } + + /* ABCD + * * EFGH + * ====== + * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32 + * + * Start by splitting into the 4 multiplies. + */ + nir_ssa_def *src0l = nir_iand(b, src0, nir_imm_int(b, 0xffff)); + nir_ssa_def *src1l = nir_iand(b, src1, nir_imm_int(b, 0xffff)); + nir_ssa_def *src0h = nir_ushr(b, src0, c16); + nir_ssa_def *src1h = nir_ushr(b, src1, c16); + + nir_ssa_def *lo = nir_imul(b, src0l, src1l); + nir_ssa_def *m1 = nir_imul(b, src0l, src1h); + nir_ssa_def *m2 = nir_imul(b, src0h, src1l); + nir_ssa_def *hi = nir_imul(b, src0h, src1h); + + nir_ssa_def *tmp; + + tmp = nir_ishl(b, m1, c16); + hi = nir_iadd(b, hi, nir_iand(b, nir_uadd_carry(b, lo, tmp), c1)); + lo = nir_iadd(b, lo, tmp); + hi = nir_iadd(b, hi, nir_ushr(b, m1, c16)); + + tmp = nir_ishl(b, m2, c16); + hi = nir_iadd(b, hi, nir_iand(b, nir_uadd_carry(b, lo, tmp), c1)); + lo = nir_iadd(b, lo, tmp); + hi = nir_iadd(b, hi, nir_ushr(b, m2, c16)); + + if (instr->op == nir_op_imul_high) { + /* For channels where different_signs is set we have to perform a + * 64-bit negation. This is *not* the same as just negating the + * high 32-bits. Consider -3 * 2. The high 32-bits is 0, but the + * desired result is -1, not -0! Recall -x == ~x + 1. + */ + hi = nir_bcsel(b, different_signs, + nir_iadd(b, + nir_inot(b, hi), + nir_iand(b, + nir_uadd_carry(b, + nir_inot(b, lo), + c1), + nir_imm_int(b, 1))), + hi); + } + + lowered = hi; + } + break; + + default: + break; + } + + if (lowered) { + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(lowered)); + nir_instr_remove(&instr->instr); + return true; + } else { + return false; + } +} + +bool +nir_lower_alu(nir_shader *shader) +{ + bool progress = false; + + if (!shader->options->lower_mul_high) + return false; + + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder builder; + nir_builder_init(&builder, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_alu) { + progress = lower_alu_instr(nir_instr_as_alu(instr), + &builder) || progress; + } + } + } + + if (progress) { + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } + } + } + + return progress; +} |