diff options
author | Francisco Jerez <currojerez@riseup.net> | 2016-05-18 22:13:52 -0700 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2016-05-27 23:29:04 -0700 |
commit | ecd7a7255aa1d6c313ead14e1b472c073c7111ac (patch) | |
tree | bb4b0b92bad47f89218304954818ae63f5da6c3e | |
parent | 0fec265373f269d116f6d4de900b208fffabe2a1 (diff) |
i965/fs: Keep track of flag dependencies with byte granularity during scheduling.
This prevents false dependencies from being created between
instructions that write disjoint 8-bit portions of the flag register
and OTOH should make sure that the scheduler considers dependencies
between instructions that write or read multiple flag subregisters
at once (e.g. 32-wide predication or conditional mods).
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 41 |
1 files changed, 31 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 6d6a19d0288..8afdc25c2c5 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -914,7 +914,7 @@ fs_instruction_scheduler::calculate_deps() */ schedule_node *last_grf_write[grf_count * 16]; schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)]; - schedule_node *last_conditional_mod[2] = { NULL, NULL }; + schedule_node *last_conditional_mod[4] = {}; schedule_node *last_accumulator_write = NULL; /* Fixed HW registers are assumed to be separate from the virtual * GRFs, so they can be tracked separately. We don't really write @@ -968,8 +968,13 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->reads_flag()) { - add_dep(last_conditional_mod[inst->flag_subreg], n); + if (const unsigned mask = inst->flags_read(v->devinfo)) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) + add_dep(last_conditional_mod[i], n); + } } if (inst->reads_accumulator_implicitly()) { @@ -1023,9 +1028,15 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->writes_flag()) { - add_dep(last_conditional_mod[inst->flag_subreg], n, 0); - last_conditional_mod[inst->flag_subreg] = n; + if (const unsigned mask = inst->flags_written()) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) { + add_dep(last_conditional_mod[i], n, 0); + last_conditional_mod[i] = n; + } + } } if (inst->writes_accumulator_implicitly(v->devinfo) && @@ -1080,8 +1091,13 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->reads_flag()) { - add_dep(n, last_conditional_mod[inst->flag_subreg]); + if (const unsigned mask = inst->flags_read(v->devinfo)) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) + add_dep(n, last_conditional_mod[i]); + } } if (inst->reads_accumulator_implicitly()) { @@ -1132,8 +1148,13 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->writes_flag()) { - last_conditional_mod[inst->flag_subreg] = n; + if (const unsigned mask = inst->flags_written()) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) + last_conditional_mod[i] = n; + } } if (inst->writes_accumulator_implicitly(v->devinfo)) { |