aboutsummaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2020-05-14 09:31:32 -0700
committerMarge Bot <[email protected]>2020-06-16 20:56:15 +0000
commita60d48a863d8e9d459562b874c463cd798f47a87 (patch)
tree76161e3cb1885df8f36fdf3b340e0c00d1363b00 /src/freedreno
parent5f1f8f7b172166c95b51473e3af3d72777c21dfc (diff)
freedreno/ir3/cp: extract valid_flags
We'll also need this in the postsched-cp pass. Signed-off-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5280>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3.c170
-rw-r--r--src/freedreno/ir3/ir3.h2
-rw-r--r--src/freedreno/ir3/ir3_cp.c180
3 files changed, 178 insertions, 174 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 5f4b86d610d..463c7664332 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -1285,3 +1285,173 @@ ir3_fixup_src_type(struct ir3_instruction *instr)
break;
}
}
+
+static unsigned
+cp_flags(unsigned flags)
+{
+ /* only considering these flags (at least for now): */
+ flags &= (IR3_REG_CONST | IR3_REG_IMMED |
+ IR3_REG_FNEG | IR3_REG_FABS |
+ IR3_REG_SNEG | IR3_REG_SABS |
+ IR3_REG_BNOT | IR3_REG_RELATIV);
+ return flags;
+}
+
+bool
+ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
+ unsigned flags)
+{
+ struct ir3_compiler *compiler = instr->block->shader->compiler;
+ unsigned valid_flags;
+
+ if ((flags & IR3_REG_HIGH) &&
+ (opc_cat(instr->opc) > 1) &&
+ (compiler->gpu_id >= 600))
+ return false;
+
+ flags = cp_flags(flags);
+
+ /* If destination is indirect, then source cannot be.. at least
+ * I don't think so..
+ */
+ if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
+ (flags & IR3_REG_RELATIV))
+ return false;
+
+ if (flags & IR3_REG_RELATIV) {
+ /* TODO need to test on earlier gens.. pretty sure the earlier
+ * problem was just that we didn't check that the src was from
+ * same block (since we can't propagate address register values
+ * across blocks currently)
+ */
+ if (compiler->gpu_id < 600)
+ return false;
+
+ /* NOTE in the special try_swap_mad_two_srcs() case we can be
+ * called on a src that has already had an indirect load folded
+ * in, in which case ssa() returns NULL
+ */
+ if (instr->regs[n+1]->flags & IR3_REG_SSA) {
+ struct ir3_instruction *src = ssa(instr->regs[n+1]);
+ if (src->address->block != instr->block)
+ return false;
+ }
+ }
+
+ switch (opc_cat(instr->opc)) {
+ case 1:
+ valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
+ if (flags & ~valid_flags)
+ return false;
+ break;
+ case 2:
+ valid_flags = ir3_cat2_absneg(instr->opc) |
+ IR3_REG_CONST | IR3_REG_RELATIV;
+
+ if (ir3_cat2_int(instr->opc))
+ valid_flags |= IR3_REG_IMMED;
+
+ if (flags & ~valid_flags)
+ return false;
+
+ if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+ unsigned m = (n ^ 1) + 1;
+ /* cannot deal w/ const in both srcs:
+ * (note that some cat2 actually only have a single src)
+ */
+ if (m < instr->regs_count) {
+ struct ir3_register *reg = instr->regs[m];
+ if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+ return false;
+ if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
+ return false;
+ }
+ }
+ break;
+ case 3:
+ valid_flags = ir3_cat3_absneg(instr->opc) |
+ IR3_REG_CONST | IR3_REG_RELATIV;
+
+ if (flags & ~valid_flags)
+ return false;
+
+ if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
+ /* cannot deal w/ const/relativ in 2nd src: */
+ if (n == 1)
+ return false;
+ }
+
+ break;
+ case 4:
+ /* seems like blob compiler avoids const as src.. */
+ /* TODO double check if this is still the case on a4xx */
+ if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
+ return false;
+ if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
+ return false;
+ break;
+ case 5:
+ /* no flags allowed */
+ if (flags)
+ return false;
+ break;
+ case 6:
+ valid_flags = IR3_REG_IMMED;
+ if (flags & ~valid_flags)
+ return false;
+
+ if (flags & IR3_REG_IMMED) {
+ /* doesn't seem like we can have immediate src for store
+ * instructions:
+ *
+ * TODO this restriction could also apply to load instructions,
+ * but for load instructions this arg is the address (and not
+ * really sure any good way to test a hard-coded immed addr src)
+ */
+ if (is_store(instr) && (n == 1))
+ return false;
+
+ if ((instr->opc == OPC_LDL) && (n == 0))
+ return false;
+
+ if ((instr->opc == OPC_STL) && (n != 2))
+ return false;
+
+ if (instr->opc == OPC_STLW && n == 0)
+ return false;
+
+ if (instr->opc == OPC_LDLW && n == 0)
+ return false;
+
+ /* disallow immediates in anything but the SSBO slot argument for
+ * cat6 instructions:
+ */
+ if (is_atomic(instr->opc) && (n != 0))
+ return false;
+
+ if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
+ return false;
+
+ if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
+ return false;
+
+ /* as with atomics, these cat6 instrs can only have an immediate
+ * for SSBO/IBO slot argument
+ */
+ switch (instr->opc) {
+ case OPC_LDIB:
+ case OPC_LDC:
+ case OPC_RESINFO:
+ if (n != 0)
+ return false;
+ break;
+ default:
+ break;
+ }
+ }
+
+ break;
+ }
+
+ return true;
+}
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 90086294de8..1d37afe0ad6 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -620,6 +620,8 @@ void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
void ir3_set_dst_type(struct ir3_instruction *instr, bool half);
void ir3_fixup_src_type(struct ir3_instruction *instr);
+bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
+
#include "util/set.h"
#define foreach_ssa_use(__use, __instr) \
for (struct ir3_instruction *__use = (void *)~0; \
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index b95827e97e9..ed116a9f3b7 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -100,174 +100,6 @@ static bool is_eligible_mov(struct ir3_instruction *instr,
return false;
}
-static unsigned cp_flags(unsigned flags)
-{
- /* only considering these flags (at least for now): */
- flags &= (IR3_REG_CONST | IR3_REG_IMMED |
- IR3_REG_FNEG | IR3_REG_FABS |
- IR3_REG_SNEG | IR3_REG_SABS |
- IR3_REG_BNOT | IR3_REG_RELATIV);
- return flags;
-}
-
-static bool valid_flags(struct ir3_instruction *instr, unsigned n,
- unsigned flags)
-{
- struct ir3_compiler *compiler = instr->block->shader->compiler;
- unsigned valid_flags;
-
- if ((flags & IR3_REG_HIGH) &&
- (opc_cat(instr->opc) > 1) &&
- (compiler->gpu_id >= 600))
- return false;
-
- flags = cp_flags(flags);
-
- /* If destination is indirect, then source cannot be.. at least
- * I don't think so..
- */
- if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
- (flags & IR3_REG_RELATIV))
- return false;
-
- if (flags & IR3_REG_RELATIV) {
- /* TODO need to test on earlier gens.. pretty sure the earlier
- * problem was just that we didn't check that the src was from
- * same block (since we can't propagate address register values
- * across blocks currently)
- */
- if (compiler->gpu_id < 600)
- return false;
-
- /* NOTE in the special try_swap_mad_two_srcs() case we can be
- * called on a src that has already had an indirect load folded
- * in, in which case ssa() returns NULL
- */
- if (instr->regs[n+1]->flags & IR3_REG_SSA) {
- struct ir3_instruction *src = ssa(instr->regs[n+1]);
- if (src->address->block != instr->block)
- return false;
- }
- }
-
- switch (opc_cat(instr->opc)) {
- case 1:
- valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
- if (flags & ~valid_flags)
- return false;
- break;
- case 2:
- valid_flags = ir3_cat2_absneg(instr->opc) |
- IR3_REG_CONST | IR3_REG_RELATIV;
-
- if (ir3_cat2_int(instr->opc))
- valid_flags |= IR3_REG_IMMED;
-
- if (flags & ~valid_flags)
- return false;
-
- if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
- unsigned m = (n ^ 1) + 1;
- /* cannot deal w/ const in both srcs:
- * (note that some cat2 actually only have a single src)
- */
- if (m < instr->regs_count) {
- struct ir3_register *reg = instr->regs[m];
- if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
- return false;
- if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
- return false;
- }
- }
- break;
- case 3:
- valid_flags = ir3_cat3_absneg(instr->opc) |
- IR3_REG_CONST | IR3_REG_RELATIV;
-
- if (flags & ~valid_flags)
- return false;
-
- if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
- /* cannot deal w/ const/relativ in 2nd src: */
- if (n == 1)
- return false;
- }
-
- break;
- case 4:
- /* seems like blob compiler avoids const as src.. */
- /* TODO double check if this is still the case on a4xx */
- if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
- return false;
- if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
- return false;
- break;
- case 5:
- /* no flags allowed */
- if (flags)
- return false;
- break;
- case 6:
- valid_flags = IR3_REG_IMMED;
- if (flags & ~valid_flags)
- return false;
-
- if (flags & IR3_REG_IMMED) {
- /* doesn't seem like we can have immediate src for store
- * instructions:
- *
- * TODO this restriction could also apply to load instructions,
- * but for load instructions this arg is the address (and not
- * really sure any good way to test a hard-coded immed addr src)
- */
- if (is_store(instr) && (n == 1))
- return false;
-
- if ((instr->opc == OPC_LDL) && (n == 0))
- return false;
-
- if ((instr->opc == OPC_STL) && (n != 2))
- return false;
-
- if (instr->opc == OPC_STLW && n == 0)
- return false;
-
- if (instr->opc == OPC_LDLW && n == 0)
- return false;
-
- /* disallow immediates in anything but the SSBO slot argument for
- * cat6 instructions:
- */
- if (is_atomic(instr->opc) && (n != 0))
- return false;
-
- if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
- return false;
-
- if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
- return false;
-
- /* as with atomics, these cat6 instrs can only have an immediate
- * for SSBO/IBO slot argument
- */
- switch (instr->opc) {
- case OPC_LDIB:
- case OPC_LDC:
- case OPC_RESINFO:
- if (n != 0)
- return false;
- break;
- default:
- break;
- }
- }
-
- break;
- }
-
- return true;
-}
-
/* propagate register flags from src to dst.. negates need special
* handling to cancel each other out.
*/
@@ -326,7 +158,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
- if (!valid_flags(instr, n, new_flags))
+ if (!ir3_valid_flags(instr, n, new_flags))
return false;
unsigned swiz, idx, i;
@@ -455,9 +287,9 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags)
bool valid_swap =
/* can we propagate mov if we move 2nd src to first? */
- valid_flags(instr, 0, new_flags) &&
+ ir3_valid_flags(instr, 0, new_flags) &&
/* and does first src fit in second slot? */
- valid_flags(instr, 1, instr->regs[1 + 1]->flags);
+ ir3_valid_flags(instr, 1, instr->regs[1 + 1]->flags);
if (!valid_swap) {
/* put things back the way they were: */
@@ -487,7 +319,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
combine_flags(&new_flags, src);
- if (valid_flags(instr, n, new_flags)) {
+ if (ir3_valid_flags(instr, n, new_flags)) {
if (new_flags & IR3_REG_ARRAY) {
debug_assert(!(reg->flags & IR3_REG_ARRAY));
reg->array = src_reg->array;
@@ -512,7 +344,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
combine_flags(&new_flags, src);
- if (!valid_flags(instr, n, new_flags)) {
+ if (!ir3_valid_flags(instr, n, new_flags)) {
/* See if lowering an immediate to const would help. */
if (lower_immed(ctx, instr, n, src_reg, new_flags))
return true;
@@ -613,7 +445,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
iim_val = ~iim_val;
/* other than category 1 (mov) we can only encode up to 10 bits: */
- if (valid_flags(instr, n, new_flags) &&
+ if (ir3_valid_flags(instr, n, new_flags) &&
((instr->opc == OPC_MOV) ||
!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff)))) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);