diff options
author | Rob Clark <[email protected]> | 2015-04-06 10:48:11 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2015-04-11 11:39:46 -0400 |
commit | f0e9a632a12798bd727799e396cde665bd960665 (patch) | |
tree | 2c34e7378cd03c5424cec08873d76aaea4ae96bc /src/gallium | |
parent | f59613561694cc4a4b81db8a73f8afe893dbacac (diff) |
freedreno/ir3/cp: support to swap mad src's
For a normal MAD (ie. not MADSH), if first source is gpr and second
source is const, we can swap the first two sources to avoid needing a
mov instruction.
This gives back the biggest advantage TGSI f/e had over NIR f/e for
common shaders, since TGSI f/e had this logic in the f/e. Note that
doing this in copy-prop step has the advantage that it will also work
for cases like:
MOV TEMP[b], CONST[x]
MAD TEMP[d], TEMP[a], TEMP[b], TEMP[c]
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/instr-a3xx.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c | 32 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_depth.c | 3 |
4 files changed, 43 insertions, 9 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 4d75d771435..98637c7874d 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -676,9 +676,7 @@ static inline bool is_mad(opc_t opc) { switch (opc) { case OPC_MAD_U16: - case OPC_MADSH_U16: case OPC_MAD_S16: - case OPC_MADSH_M16: case OPC_MAD_U24: case OPC_MAD_S24: case OPC_MAD_F16: @@ -689,4 +687,15 @@ static inline bool is_mad(opc_t opc) } } +static inline bool is_madsh(opc_t opc) +{ + switch (opc) { + case OPC_MADSH_U16: + case OPC_MADSH_M16: + return true; + default: + return false; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f424f73bec3..1a8beade25b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -540,10 +540,10 @@ static inline bool reg_gpr(struct ir3_register *r) return true; } -/* some cat2 instructions (ie. those which are not float can embed an +/* some cat2 instructions (ie. those which are not float) can embed an * immediate: */ -static inline bool ir3_cat2_immed(opc_t opc) +static inline bool ir3_cat2_int(opc_t opc) { switch (opc) { case OPC_ADD_U: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 3eb85f660e2..77bfbc53e2a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -115,7 +115,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, case 2: valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST; - if (ir3_cat2_immed(instr->opc)) + if (ir3_cat2_int(instr->opc)) valid_flags |= IR3_REG_IMMED; if (flags & ~valid_flags) @@ -199,6 +199,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags) static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags); +/* the "plain" MAD's (ie. the ones that don't shift first src prior to + * multiply) can swap their first two srcs if src[0] is !CONST and + * src[1] is CONST: + */ +static bool is_valid_mad(struct ir3_instruction *instr) +{ + return (instr->category == 3) && is_mad(instr->opc); +} + /** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src @@ -255,8 +264,23 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) combine_flags(&new_flags, reg->flags); - if (!valid_flags(instr, n, new_flags)) - return; + if (!valid_flags(instr, n, new_flags)) { + /* special case for "normal" mad instructions, we can + * try swapping the first two args if that fits better. + */ + if ((n == 1) && is_valid_mad(instr) && + !(instr->regs[0 + 1]->flags & IR3_REG_CONST) && + valid_flags(instr, 0, new_flags)) { + /* swap src[0] and src[1]: */ + struct ir3_register *tmp; + tmp = instr->regs[0 + 1]; + instr->regs[0 + 1] = instr->regs[1 + 1]; + instr->regs[1 + 1] = tmp; + n = 0; + } else { + return; + } + } /* Here we handle the special case of mov from * CONST and/or RELATIV. These need to be handled @@ -305,7 +329,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) debug_assert((instr->category == 6) || ((instr->category == 2) && - ir3_cat2_immed(instr->opc))); + ir3_cat2_int(instr->opc))); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 0cda62bf102..9e1f45dabaf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -74,7 +74,8 @@ int ir3_delayslots(struct ir3_instruction *assigner, if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { return 6; } else if ((consumer->category == 3) && - is_mad(consumer->opc) && (n == 2)) { + (is_mad(consumer->opc) || is_madsh(consumer->opc)) && + (n == 2)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1; } else { |