diff options
author | Jason Ekstrand <[email protected]> | 2019-03-29 22:51:20 -0500 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2019-04-05 18:39:00 -0500 |
commit | 03a72d96d8dacc32e817089b94bec08ac70b898b (patch) | |
tree | bd904833541c1fd7ec92a8ab0c3e3a2627433c19 | |
parent | cade9001b1758ee9b76f365b02822c97a414006a (diff) |
nir/algebraic: Drop some @bool specifiers
Now that we have one-bit booleans, we don't need to rely on looking at
parent instructions in order to figure out if a value is a Boolean most
of the time. We can drop these specifiers and now the optimizations
will apply more generally.
Shader-DB results on Kaby Lake:
total instructions in shared programs: 15321168 -> 15321227 (<.01%)
instructions in affected programs: 8836 -> 8895 (0.67%)
helped: 1
HURT: 31
total cycles in shared programs: 357481781 -> 357481321 (<.01%)
cycles in affected programs: 146524 -> 146064 (-0.31%)
helped: 22
HURT: 10
total spills in shared programs: 23675 -> 23673 (<.01%)
spills in affected programs: 11 -> 9 (-18.18%)
helped: 1
HURT: 0
total fills in shared programs: 32040 -> 32036 (-0.01%)
fills in affected programs: 27 -> 23 (-14.81%)
helped: 1
HURT: 0
No change in VkPipeline-DB
Looking at the instructions hurt, a bunch of them seem to be a case
where doing exactly the right thing in NIR ends up doing the wrong-ish
thing in the back-end because flags are dumb. In particular, there's a
case where we have a MUL followed by a CMP followed by a SEL and when we
turn that SEL into an OR, it uses the GRF result of the CMP rather than
the flag result so the CMP can't be merged with the MUL. Those shaders
appear to schedule better according to the cycle estimates so I guess
it's a win? Also it helps spilling in one Car Chase compute shader.
Reviewed-by: Timothy Arceri <[email protected]>
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index bdf70787937..597d479bd63 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -312,7 +312,7 @@ optimizations = [ (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, c, 'e')), ('bcsel', b, c, ('bcsel', a, d, 'e'))), (('bcsel', a, ('bcsel', b, c, d), ('bcsel(is_used_once)', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)), (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)), - (('bcsel', a, True, 'b@bool'), ('ior', a, b)), + (('bcsel', a, True, b), ('ior', a, b)), (('fmin', a, a), a), (('fmax', a, a), a), (('imin', a, a), a), @@ -390,7 +390,7 @@ optimizations = [ (('ior', ('uge', 1, a), ('ieq', a, 2)), ('uge', 2, a)), (('ior', ('uge', 2, a), ('ieq', a, 3)), ('uge', 3, a)), - (('ior', 'a@bool', ('ieq', a, False)), True), + (('ior', a, ('ieq', a, False)), True), (('ior', a, ('inot', a)), -1), (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', 'a@32', 'b@32'), 0)), @@ -535,10 +535,10 @@ optimizations = [ # Boolean simplifications (('i2b32(is_used_by_if)', a), ('ine32', a, 0)), (('i2b1(is_used_by_if)', a), ('ine', a, 0)), - (('ieq', 'a@bool', True), a), - (('ine(is_not_used_by_if)', 'a@bool', True), ('inot', a)), - (('ine', 'a@bool', False), a), - (('ieq(is_not_used_by_if)', 'a@bool', False), ('inot', 'a')), + (('ieq', a, True), a), + (('ine(is_not_used_by_if)', a, True), ('inot', a)), + (('ine', a, False), a), + (('ieq(is_not_used_by_if)', a, False), ('inot', 'a')), (('bcsel', a, True, False), a), (('bcsel', a, False, True), ('inot', a)), (('bcsel@32', a, 1.0, 0.0), ('b2f', a)), @@ -1018,7 +1018,7 @@ late_optimizations = [ (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))), (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))), - (('bcsel', 'a@bool', 0, ('b2f32', ('inot', 'b@bool'))), ('b2f32', ('inot', ('ior', a, b)))), + (('bcsel', a, 0, ('b2f32', ('inot', 'b@bool'))), ('b2f32', ('inot', ('ior', a, b)))), ] print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()) |