diff options
author | Ian Romanick <[email protected]> | 2019-05-22 10:18:06 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2019-06-05 17:03:40 -0700 |
commit | 8030cb75c18c129febd4ef0704e8c9b6142a629a (patch) | |
tree | 2cf8f5597b26688a40df03b9bf26db2e2541304c /src/intel/compiler/brw_fs_cmod_propagation.cpp | |
parent | 2dd60139331fa67d674fd53155a930140c60ee0c (diff) |
intel/fs: Fix flag_subreg handling in cmod propagation
There were two errors. First, the pass could propagate conditional
modifiers from an instruction that writes on flag register to an
instruction that writes a different flag register. For example,
cmp.nz.f0.0(16) null:F, vgrf6:F, vgrf5:F
cmp.nz.f0.1(16) null:F, vgrf6:F, vgrf5:F
could be come
cmp.nz.f0.0(16) null:F, vgrf6:F, vgrf5:F
Second, if an instruction writes f0.1 has it's condition propagated, the
modified instruction will incorrectly write flag f0.0. For example,
linterp(16) vgrf6:F, g2:F, attr0:F
cmp.z.f0.1(16) null:F, vgrf6:F, vgrf5:F
(-f0.1) discard_jump(16) (null):UD
could become
linterp.z.f0.0(16) vgrf6:F, g2:F, attr0:F
(-f0.1) discard_jump(16) (null):UD
None of these cases will occur currently. The only time we use f0.1 is
for generating discard intrinsics. In all those cases, we generate a
squence like:
cmp.nz.f0.0(16) vgrf7:F, vgrf6:F, vgrf5:F
(+f0.1) cmp.z(16) null:D, vgrf7:D, 0d
(-f0.1) discard_jump(16) (null):UD
Due to the mixed types and incompatible conditions, this sequence would
never see any cmod propagation. The next patch will change this.
No shader-db changes on any Intel platform.
v2: Fix typo in comment in test case subtract_delete_compare_other_flag.
Noticed by Caio.
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs_cmod_propagation.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs_cmod_propagation.cpp | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index b430d4b2446..ba4df592424 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -53,6 +53,7 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, fs_inst *inst) { bool read_flag = false; + const unsigned flags_written = inst->flags_written(); foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->opcode == BRW_OPCODE_ADD && @@ -79,6 +80,17 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, goto not_match; } + /* If the scan instruction writes a different flag register than the + * instruction we're trying to propagate from, bail. + * + * FINISHME: The second part of the condition may be too strong. + * Perhaps (scan_inst->flags_written() & flags_written) != + * flags_written? + */ + if (scan_inst->flags_written() != 0 && + scan_inst->flags_written() != flags_written) + goto not_match; + /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": * * * Note that the [post condition signal] bits generated at @@ -130,6 +142,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, { const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod); bool read_flag = false; + const unsigned flags_written = inst->flags_written(); if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ) return false; @@ -146,6 +159,17 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, scan_inst->exec_size != inst->exec_size) break; + /* If the scan instruction writes a different flag register than the + * instruction we're trying to propagate from, bail. + * + * FINISHME: The second part of the condition may be too strong. + * Perhaps (scan_inst->flags_written() & flags_written) != + * flags_written? + */ + if (scan_inst->flags_written() != 0 && + scan_inst->flags_written() != flags_written) + break; + if (scan_inst->can_do_cmod() && ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || scan_inst->conditional_mod == cond)) { @@ -231,9 +255,21 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) } bool read_flag = false; + const unsigned flags_written = inst->flags_written(); foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, inst->src[0], inst->size_read(0))) { + /* If the scan instruction writes a different flag register than + * the instruction we're trying to propagate from, bail. + * + * FINISHME: The second part of the condition may be too strong. + * Perhaps (scan_inst->flags_written() & flags_written) != + * flags_written? + */ + if (scan_inst->flags_written() != 0 && + scan_inst->flags_written() != flags_written) + break; + if (scan_inst->is_partial_write() || scan_inst->dst.offset != inst->src[0].offset || scan_inst->exec_size != inst->exec_size) @@ -380,6 +416,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || scan_inst->conditional_mod == cond)) { scan_inst->conditional_mod = cond; + scan_inst->flag_subreg = inst->flag_subreg; inst->remove(block); progress = true; } |