aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-12-16 13:30:10 +0000
committerDylan Baker <[email protected]>2020-01-14 14:03:25 -0800
commit4a7e014b4dd601e4c3445c026133373aec123986 (patch)
tree842bcd79f25c18d732cb8fadb0117665741cdd77
parent83821ebb25bb9ed844ff8e5e21cb214b1f96e90f (diff)
aco: don't DCE atomics with return values
We don't create atomics with definitions if they are not used in NIR, but our own DCE can remove the uses if an export turns out to be null. Signed-off-by: Rhys Perry <[email protected]> Fixes: 93c8ebfa780 ('aco: Initial commit of independent AMD compiler') Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3081> (cherry picked from commit 69bed1c9186c3e24ad54089218d58c5f7b83befe) Conflicts resolved by Dylan Baker Conflicts: src/amd/compiler/aco_opcodes.py
-rw-r--r--src/amd/compiler/aco_dead_code_analysis.cpp16
-rw-r--r--src/amd/compiler/aco_ir.h3
-rw-r--r--src/amd/compiler/aco_opcodes.py17
-rw-r--r--src/amd/compiler/aco_opcodes_cpp.py2
-rw-r--r--src/amd/compiler/aco_optimizer.cpp7
5 files changed, 26 insertions, 19 deletions
diff --git a/src/amd/compiler/aco_dead_code_analysis.cpp b/src/amd/compiler/aco_dead_code_analysis.cpp
index f56718f0479..443ba188c18 100644
--- a/src/amd/compiler/aco_dead_code_analysis.cpp
+++ b/src/amd/compiler/aco_dead_code_analysis.cpp
@@ -57,11 +57,7 @@ void process_block(dce_ctx& ctx, Block& block)
continue;
aco_ptr<Instruction>& instr = block.instructions[idx];
- const bool is_live = instr->definitions.empty() ||
- std::any_of(instr->definitions.begin(), instr->definitions.end(),
- [&ctx] (const Definition& def) { return !def.isTemp() || ctx.uses[def.tempId()];});
-
- if (is_live) {
+ if (!is_dead(ctx.uses, instr.get())) {
for (const Operand& op : instr->operands) {
if (op.isTemp()) {
if (ctx.uses[op.tempId()] == 0)
@@ -81,6 +77,16 @@ void process_block(dce_ctx& ctx, Block& block)
} /* end namespace */
+bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr)
+{
+ if (instr->definitions.empty())
+ return false;
+ if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
+ [&uses] (const Definition& def) { return uses[def.tempId()];}))
+ return false;
+ return instr_info.is_atomic[(int)instr->opcode];
+}
+
std::vector<uint16_t> dead_code_analysis(Program *program) {
dce_ctx ctx(program);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 5b01ca7addf..cb671e56b01 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -906,6 +906,8 @@ constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
}
}
+bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
+
enum block_kind {
/* uniform indicates that leaving this block,
* all actives lanes stay active */
@@ -1182,6 +1184,7 @@ typedef struct {
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
+ const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
const char *name[static_cast<int>(aco_opcode::num_opcodes)];
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
} Info;
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 08337a18d22..fb8617a8ec5 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -154,7 +154,7 @@ class Opcode(object):
"""Class that represents all the information we have about the opcode
NOTE: this must be kept in sync with aco_op_info
"""
- def __init__(self, name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod):
+ def __init__(self, name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic):
"""Parameters:
- name is the name of the opcode (prepend nir_op_ for the enum name)
@@ -177,6 +177,7 @@ class Opcode(object):
self.opcode_gfx10 = opcode_gfx10
self.input_mod = "1" if input_mod else "0"
self.output_mod = "1" if output_mod else "0"
+ self.is_atomic = "1" if is_atomic else "0"
self.format = format
@@ -186,9 +187,9 @@ opcodes = {}
# VOPC to GFX6 opcode translation map
VOPC_GFX6 = [0] * 256
-def opcode(name, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False):
+def opcode(name, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False, is_atomic = True):
assert name not in opcodes
- opcodes[name] = Opcode(name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod)
+ opcodes[name] = Opcode(name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic)
opcode("exp", 0, 0, format = Format.EXP)
opcode("p_parallelcopy")
@@ -584,7 +585,7 @@ SMEM = {
( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
- opcode(name, gfx9, gfx10, Format.SMEM)
+ opcode(name, gfx9, gfx10, Format.SMEM, is_atomic = "atomic" not in name)
# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
@@ -1261,7 +1262,7 @@ MUBUF = {
( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
- opcode(name, gfx9, gfx10, Format.MUBUF)
+ opcode(name, gfx9, gfx10, Format.MUBUF, is_atomic = "atomic" not in name)
MTBUF = {
(0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
@@ -1325,7 +1326,7 @@ IMAGE_ATOMIC = {
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
# gfx7 and gfx10 opcodes are the same here
for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
- opcode(name, gfx89, gfx7, Format.MIMG)
+ opcode(name, gfx89, gfx7, Format.MIMG, is_atomic = False)
IMAGE_SAMPLE = {
(0x20, "image_sample"),
@@ -1465,7 +1466,7 @@ FLAT = {
(0x60, -1, 0x60, "flat_atomic_fmax_x2"),
}
for (gfx7, gfx8, gfx10, name) in FLAT:
- opcode(name, gfx8, gfx10, Format.FLAT)
+ opcode(name, gfx8, gfx10, Format.FLAT, is_atomic = "atomic" not in name)
GLOBAL = {
#GFX8_9, GFX10
@@ -1525,7 +1526,7 @@ GLOBAL = {
( -1, 0x60, "global_atomic_fmax_x2"),
}
for (gfx8, gfx10, name) in GLOBAL:
- opcode(name, gfx8, gfx10, Format.GLOBAL)
+ opcode(name, gfx8, gfx10, Format.GLOBAL, is_atomic = "atomic" not in name)
SCRATCH = {
#GFX8_9, GFX10
diff --git a/src/amd/compiler/aco_opcodes_cpp.py b/src/amd/compiler/aco_opcodes_cpp.py
index 83c24e0eb44..364b2309f36 100644
--- a/src/amd/compiler/aco_opcodes_cpp.py
+++ b/src/amd/compiler/aco_opcodes_cpp.py
@@ -38,6 +38,7 @@ const unsigned VOPC_to_GFX6[256] = {
opcode_names = sorted(opcodes.keys())
can_use_input_modifiers = "".join([opcodes[name].input_mod for name in reversed(opcode_names)])
can_use_output_modifiers = "".join([opcodes[name].output_mod for name in reversed(opcode_names)])
+is_atomic = "".join([opcodes[name].is_atomic for name in reversed(opcode_names)])
%>
extern const aco::Info instr_info = {
@@ -53,6 +54,7 @@ extern const aco::Info instr_info = {
},
.can_use_input_modifiers = std::bitset<${len(opcode_names)}>("${can_use_input_modifiers}"),
.can_use_output_modifiers = std::bitset<${len(opcode_names)}>("${can_use_output_modifiers}"),
+ .is_atomic = std::bitset<${len(opcode_names)}>("${is_atomic}"),
.name = {
% for name in opcode_names:
"${name}",
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index b3c43dbeb8f..4417a382012 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2202,12 +2202,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
{
const uint32_t threshold = 4;
- /* Dead Code Elimination:
- * We remove instructions if they define temporaries which all are unused */
- const bool is_used = instr->definitions.empty() ||
- std::any_of(instr->definitions.begin(), instr->definitions.end(),
- [&ctx](const Definition& def) { return ctx.uses[def.tempId()]; });
- if (!is_used) {
+ if (is_dead(ctx.uses, instr.get())) {
instr.reset();
return;
}