summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2020-02-11 16:52:20 +0000
committerMarge Bot <[email protected]>2020-03-03 15:38:13 +0000
commit9fea90ad5170dd64376d22a14ac88c392813c96c (patch)
tree5a99a6a35515c973ecbf468a3a760d74d5a13187 /src
parent3f31c54842d4d2e1e78dad6cab57e45cb616b344 (diff)
aco: keep track of which events are used in a barrier
And properly handle unordered events so that they always wait for 0. Signed-off-by: Rhys Perry <[email protected]> Fixes: 93c8ebfa780 ('aco: Initial commit of independent AMD compiler') Reviewed-by: Daniel Schürmann <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3774> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3774>
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp60
1 files changed, 41 insertions, 19 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 8d8024f5aa2..e0184993c6b 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -247,6 +247,7 @@ struct wait_ctx {
bool pending_s_buffer_store = false; /* GFX10 workaround */
wait_imm barrier_imm[barrier_count];
+ uint16_t barrier_events[barrier_count]; /* use wait_event notion */
std::map<PhysReg,wait_entry> gpr_map;
@@ -291,8 +292,11 @@ struct wait_ctx {
}
}
- for (unsigned i = 0; i < barrier_count; i++)
+ for (unsigned i = 0; i < barrier_count; i++) {
changed |= barrier_imm[i].combine(other->barrier_imm[i]);
+ changed |= other->barrier_events[i] & ~barrier_events[i];
+ barrier_events[i] |= other->barrier_events[i];
+ }
return changed;
}
@@ -452,14 +456,25 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
/* update barrier wait imms */
for (unsigned i = 0; i < barrier_count; i++) {
wait_imm& bar = ctx.barrier_imm[i];
- if (bar.exp != wait_imm::unset_counter && imm.exp <= bar.exp)
+ uint16_t& bar_ev = ctx.barrier_events[i];
+ if (bar.exp != wait_imm::unset_counter && imm.exp <= bar.exp) {
bar.exp = wait_imm::unset_counter;
- if (bar.vm != wait_imm::unset_counter && imm.vm <= bar.vm)
+ bar_ev &= ~exp_events;
+ }
+ if (bar.vm != wait_imm::unset_counter && imm.vm <= bar.vm) {
bar.vm = wait_imm::unset_counter;
- if (bar.lgkm != wait_imm::unset_counter && imm.lgkm <= bar.lgkm)
+ bar_ev &= ~(vm_events & ~event_flat);
+ }
+ if (bar.lgkm != wait_imm::unset_counter && imm.lgkm <= bar.lgkm) {
bar.lgkm = wait_imm::unset_counter;
- if (bar.vs != wait_imm::unset_counter && imm.vs <= bar.vs)
+ bar_ev &= ~(lgkm_events & ~event_flat);
+ }
+ if (bar.vs != wait_imm::unset_counter && imm.vs <= bar.vs) {
bar.vs = wait_imm::unset_counter;
+ bar_ev &= ~vs_events;
+ }
+ if (bar.vm == wait_imm::unset_counter && bar.lgkm == wait_imm::unset_counter)
+ bar_ev &= ~event_flat;
}
/* remove all gprs with higher counter from map */
@@ -491,12 +506,19 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
return imm;
}
-void update_barrier_imm(wait_ctx& ctx, uint8_t counters, barrier_interaction barrier)
+void update_barrier_counter(uint8_t *ctr, unsigned max)
+{
+ if (*ctr != wait_imm::unset_counter && *ctr < max)
+ (*ctr)++;
+}
+
+void update_barrier_imm(wait_ctx& ctx, uint8_t counters, wait_event event, barrier_interaction barrier)
{
- unsigned barrier_index = ffs(barrier) - 1;
for (unsigned i = 0; i < barrier_count; i++) {
wait_imm& bar = ctx.barrier_imm[i];
- if (i == barrier_index) {
+ uint16_t& bar_ev = ctx.barrier_events[i];
+ if (barrier & (1 << i)) {
+ bar_ev |= event;
if (counters & counter_lgkm)
bar.lgkm = 0;
if (counters & counter_vm)
@@ -505,15 +527,15 @@ void update_barrier_imm(wait_ctx& ctx, uint8_t counters, barrier_interaction bar
bar.exp = 0;
if (counters & counter_vs)
bar.vs = 0;
- } else {
- if (counters & counter_lgkm && bar.lgkm != wait_imm::unset_counter && bar.lgkm < ctx.max_lgkm_cnt)
- bar.lgkm++;
- if (counters & counter_vm && bar.vm != wait_imm::unset_counter && bar.vm < ctx.max_vm_cnt)
- bar.vm++;
- if (counters & counter_exp && bar.exp != wait_imm::unset_counter && bar.exp < ctx.max_exp_cnt)
- bar.exp++;
- if (counters & counter_vs && bar.vs != wait_imm::unset_counter && bar.vs < ctx.max_vs_cnt)
- bar.vs++;
+ } else if (!(bar_ev & ctx.unordered_events) && !(ctx.unordered_events & event)) {
+ if (counters & counter_lgkm && (bar_ev & lgkm_events) == event)
+ update_barrier_counter(&bar.lgkm, ctx.max_lgkm_cnt);
+ if (counters & counter_vm && (bar_ev & vm_events) == event)
+ update_barrier_counter(&bar.vm, ctx.max_vm_cnt);
+ if (counters & counter_exp && (bar_ev & exp_events) == event)
+ update_barrier_counter(&bar.exp, ctx.max_exp_cnt);
+ if (counters & counter_vs && (bar_ev & vs_events) == event)
+ update_barrier_counter(&bar.vs, ctx.max_vs_cnt);
}
}
}
@@ -531,7 +553,7 @@ void update_counters(wait_ctx& ctx, wait_event event, barrier_interaction barrie
if (counters & counter_vs && ctx.vs_cnt <= ctx.max_vs_cnt)
ctx.vs_cnt++;
- update_barrier_imm(ctx, counters, barrier);
+ update_barrier_imm(ctx, counters, event, barrier);
if (ctx.unordered_events & event)
return;
@@ -569,7 +591,7 @@ void update_counters_for_flat_load(wait_ctx& ctx, barrier_interaction barrier=ba
if (ctx.vm_cnt <= ctx.max_vm_cnt)
ctx.vm_cnt++;
- update_barrier_imm(ctx, counter_vm | counter_lgkm, barrier);
+ update_barrier_imm(ctx, counter_vm | counter_lgkm, event_flat, barrier);
for (std::pair<PhysReg,wait_entry> e : ctx.gpr_map)
{