summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-11-27 17:27:36 +0000
committerRhys Perry <[email protected]>2019-11-29 17:46:02 +0000
commit389ee819c04f3375358d0253bdb1f6094f2423c6 (patch)
tree72eb92e4b9b687a9fab2cd14ad966756707fcc1e
parentcc742562c133672c989b155d58ddc6794f9b67b8 (diff)
aco: improve FLAT/GLOBAL scheduling
Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp2
-rw-r--r--src/amd/compiler/aco_ir.h7
-rw-r--r--src/amd/compiler/aco_opcodes.py1
-rw-r--r--src/amd/compiler/aco_print_ir.cpp1
-rw-r--r--src/amd/compiler/aco_scheduler.cpp33
5 files changed, 30 insertions, 14 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 60963060dea..2bced09cf97 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -4644,6 +4644,7 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr)
flat->operands[1] = Operand(s1);
flat->glc = glc;
flat->dlc = dlc;
+ flat->barrier = barrier_buffer;
if (dst.type() == RegType::sgpr) {
Temp vec = bld.tmp(RegType::vgpr, dst.size());
@@ -4765,6 +4766,7 @@ void visit_store_global(isel_context *ctx, nir_intrinsic_instr *instr)
flat->dlc = false;
flat->offset = offset;
flat->disable_wqm = true;
+ flat->barrier = barrier_buffer;
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(flat));
}
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 60f06393aa5..4073086662a 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -850,7 +850,9 @@ struct FLAT_instruction : public Instruction {
bool dlc; /* NAVI: device level coherent */
bool lds;
bool nv;
- bool disable_wqm;
+ bool disable_wqm; /* Require an exec mask without helper invocations */
+ bool can_reorder;
+ barrier_interaction barrier;
};
struct Export_instruction : public Instruction {
@@ -972,7 +974,8 @@ constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
return static_cast<MIMG_instruction*>(instr)->barrier;
case Format::FLAT:
case Format::GLOBAL:
- return barrier_buffer;
+ case Format::SCRATCH:
+ return static_cast<FLAT_instruction*>(instr)->barrier;
case Format::DS:
return barrier_shared;
default:
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 5f74998a421..a4b02507eda 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -133,6 +133,7 @@ class Format(Enum):
('bool', 'bound_ctrl', 'false')]
elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
return [('uint16_t', 'offset', 0),
+ ('bool', 'can_reorder', 'true'),
('bool', 'glc', 'false'),
('bool', 'slc', 'false'),
('bool', 'lds', 'false'),
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 5ced1d2d7bb..780980a8c69 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -373,6 +373,7 @@ static void print_instr_format_specific(struct Instruction *instr, FILE *output)
fprintf(output, " nv");
if (flat->disable_wqm)
fprintf(output, " disable_wqm");
+ print_barrier_reorder(flat->can_reorder, flat->barrier, output);
break;
}
case Format::MTBUF: {
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp
index eb0bb0d93e9..5c164703ebf 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -138,6 +138,11 @@ bool can_move_instr(aco_ptr<Instruction>& instr, Instruction* current, int movin
case Format::MIMG:
can_reorder = static_cast<MIMG_instruction*>(current)->can_reorder;
break;
+ case Format::FLAT:
+ case Format::GLOBAL:
+ case Format::SCRATCH:
+ can_reorder = static_cast<FLAT_instruction*>(current)->can_reorder;
+ break;
default:
break;
}
@@ -186,7 +191,7 @@ bool can_reorder(Instruction* candidate)
case Format::FLAT:
case Format::GLOBAL:
case Format::SCRATCH:
- return false;
+ return static_cast<FLAT_instruction*>(candidate)->can_reorder;
default:
return true;
}
@@ -483,6 +488,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
assert(candidate_idx >= 0);
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
bool can_reorder_candidate = can_reorder(candidate.get());
+ bool is_vmem = candidate->isVMEM() || candidate->isFlatOrGlobal();
/* break when encountering another VMEM instruction, logical_start or barriers */
if (!can_reorder_smem && candidate->format == Format::SMEM && !can_reorder_candidate)
@@ -501,8 +507,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
register_pressure_indep.update(register_demand[candidate_idx]);
bool part_of_clause = false;
- if (candidate->isVMEM()) {
- bool same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
+ if (current->isVMEM() == candidate->isVMEM()) {
+ bool same_resource = true;
+ if (current->isVMEM())
+ same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
bool can_reorder = can_reorder_vmem || can_reorder_candidate;
int grab_dist = clause_insert_idx - candidate_idx;
/* We can't easily tell how much this will decrease the def-to-use
@@ -511,7 +519,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
}
/* if current depends on candidate, add additional dependencies and continue */
- bool can_move_down = !candidate->isVMEM() || part_of_clause;
+ bool can_move_down = !is_vmem || part_of_clause;
bool writes_exec = false;
for (const Definition& def : candidate->definitions) {
if (def.isTemp() && ctx.depends_on[def.tempId()])
@@ -540,7 +548,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
}
register_pressure_clause.update(register_demand[candidate_idx]);
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
- can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
+ can_reorder_vmem &= !is_vmem || can_reorder_candidate;
continue;
}
@@ -575,7 +583,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
}
register_pressure_clause.update(register_demand[candidate_idx]);
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
- can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
+ can_reorder_vmem &= !is_vmem || can_reorder_candidate;
continue;
}
@@ -636,6 +644,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
assert(candidate_idx < (int) block->instructions.size());
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
bool can_reorder_candidate = can_reorder(candidate.get());
+ bool is_vmem = candidate->isVMEM() || candidate->isFlatOrGlobal();
if (candidate->opcode == aco_opcode::p_logical_end)
break;
@@ -651,7 +660,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
bool is_dependency = false;
if (candidate->format == Format::SMEM)
is_dependency = !can_reorder_smem && !can_reorder_candidate;
- if (candidate->isVMEM())
+ if (is_vmem)
is_dependency = !can_reorder_vmem && !can_reorder_candidate;
for (const Operand& op : candidate->operands) {
if (op.isTemp() && ctx.depends_on[op.tempId()]) {
@@ -676,7 +685,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
}
/* update flag whether we can reorder other memory instructions */
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
- can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
+ can_reorder_vmem &= !is_vmem || can_reorder_candidate;
if (!found_dependency) {
insert_idx = candidate_idx;
@@ -686,7 +695,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
continue;
}
- } else if (candidate->isVMEM()) {
+ } else if (is_vmem) {
/* don't move up dependencies of other VMEM instructions */
for (const Definition& def : candidate->definitions) {
if (def.isTemp())
@@ -717,7 +726,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
ctx.RAR_dependencies[op.tempId()] = true;
}
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
- can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
+ can_reorder_vmem &= !is_vmem || can_reorder_candidate;
continue;
}
@@ -783,7 +792,7 @@ void schedule_position_export(sched_ctx& ctx, Block* block,
break;
if (candidate->opcode == aco_opcode::p_exit_early_if)
break;
- if (candidate->isVMEM() || candidate->format == Format::SMEM)
+ if (candidate->isVMEM() || candidate->format == Format::SMEM || candidate->isFlatOrGlobal())
break;
if (!can_move_instr(candidate, current, moving_interaction))
break;
@@ -876,7 +885,7 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
if (current->definitions.empty())
continue;
- if (current->isVMEM())
+ if (current->isVMEM() || current->isFlatOrGlobal())
schedule_VMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
if (current->format == Format::SMEM)
schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx);