From 0ac5ff9ecb26ebc07a48e4f15539f975cef9b82a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 13 Jun 2019 14:19:11 -0700 Subject: nir: Use nir_src_bit_size instead of alu1->dest.dest.ssa.bit_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is important because, for example nir_op_fne has dest.dest.ssa.bit_size == 1, but the source operands can be 16-, 32-, or 64-bits. Fixing this helps partial redundancy elimination for compares in a few more shaders. v2: Add unit tests for nir_opt_comparison_pre that are fixed by this commit. All Intel platforms had similar results. total instructions in shared programs: 17179408 -> 17179081 (<.01%) instructions in affected programs: 43958 -> 43631 (-0.74%) helped: 118 HURT: 2 helped stats (abs) min: 1 max: 5 x̄: 2.87 x̃: 2 helped stats (rel) min: 0.06% max: 4.12% x̄: 1.19% x̃: 0.81% HURT stats (abs) min: 6 max: 6 x̄: 6.00 x̃: 6 HURT stats (rel) min: 5.83% max: 6.06% x̄: 5.94% x̃: 5.94% 95% mean confidence interval for instructions value: -3.08 -2.37 95% mean confidence interval for instructions %-change: -1.30% -0.85% Instructions are helped. total cycles in shared programs: 360959066 -> 360942386 (<.01%) cycles in affected programs: 774274 -> 757594 (-2.15%) helped: 111 HURT: 4 helped stats (abs) min: 1 max: 1591 x̄: 169.49 x̃: 36 helped stats (rel) min: <.01% max: 24.43% x̄: 8.86% x̃: 2.24% HURT stats (abs) min: 1 max: 2068 x̄: 533.25 x̃: 32 HURT stats (rel) min: 0.02% max: 5.10% x̄: 3.06% x̃: 3.56% 95% mean confidence interval for cycles value: -200.61 -89.47 95% mean confidence interval for cycles %-change: -10.32% -6.58% Cycles are helped. Reviewed-by: Jason Ekstrand [v1] Suggested-by: Jason Ekstrand Reviewed-by: Matt Turner Fixes: be1cc3552bc ("nir: Add nir_const_value_negative_equal") --- src/compiler/nir/nir_instr_set.c | 6 +- src/compiler/nir/tests/comparison_pre_tests.cpp | 213 ++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index eb721ae4fc1..a19e8467b0a 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -441,12 +441,16 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, if (const2 == NULL) return false; + if (nir_src_bit_size(alu1->src[src1].src) != + nir_src_bit_size(alu2->src[src2].src)) + return false; + /* FINISHME: Apply the swizzle? */ return nir_const_value_negative_equal(const1, const2, nir_ssa_alu_instr_src_components(alu1, src1), nir_op_infos[alu1->op].input_types[src1], - alu1->dest.dest.ssa.bit_size); + nir_src_bit_size(alu1->src[src1].src)); } uint8_t alu1_swizzle[4] = {0}; diff --git a/src/compiler/nir/tests/comparison_pre_tests.cpp b/src/compiler/nir/tests/comparison_pre_tests.cpp index f31879be6c4..fe1cc23fb3b 100644 --- a/src/compiler/nir/tests/comparison_pre_tests.cpp +++ b/src/compiler/nir/tests/comparison_pre_tests.cpp @@ -260,6 +260,219 @@ TEST_F(comparison_pre_test, a_lt_neg_b_vs_a_plus_b) EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); } +TEST_F(comparison_pre_test, imm_lt_b_vs_neg_imm_plus_b) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_3, ssa_6 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_4, ssa_6 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_3 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_11, ssa_10 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, one, a); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, neg_one, a); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, a_lt_imm_vs_a_minus_imm) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_6, ssa_3 + * + * if ssa_6 { + * vec1 32 ssa_8 = fadd ssa_6, ssa_4 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_3 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_10, ssa_11 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, a, one); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, neg_one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, neg_imm_lt_a_vs_a_plus_imm) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_4, ssa_6 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_6, ssa_3 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_4 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_11, ssa_10 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, neg_one, a); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, a_lt_neg_imm_vs_a_plus_imm) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_6, ssa_4 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_6, ssa_3 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_4 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_10, ssa_11 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, a, neg_one); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + TEST_F(comparison_pre_test, non_scalar_add_result) { /* The optimization pass should not do anything because the result of the -- cgit v1.2.3