From 9bbfae62657f44251562174ac1f80b2e4278a52d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 21 Feb 2014 18:03:30 -0500 Subject: freedreno/a3xx/compiler: collapse nop's with repeat Easier than making more extensive use of rpt, and the more compact shaders seem to bring some bit of performance boost. (Perhaps repeat flag benefits are more than just instruction cache, possibly it saves on instruction decode as well?) Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/ir3.h | 5 +++++ src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 10 ++++++++++ 2 files changed, 15 insertions(+) (limited to 'src/gallium/drivers/freedreno/a3xx') diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h index 9c57a653553..894db175076 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3.h +++ b/src/gallium/drivers/freedreno/a3xx/ir3.h @@ -312,6 +312,11 @@ static inline bool is_flow(struct ir3_instruction *instr) return (instr->category == 0); } +static inline bool is_nop(struct ir3_instruction *instr) +{ + return is_flow(instr) && (instr->opc == OPC_NOP); +} + static inline bool is_alu(struct ir3_instruction *instr) { return (1 <= instr->category) && (instr->category <= 3); diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index 5df57e776f9..06a86ff3b2d 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -542,6 +542,16 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) if ((shader->instrs_count == 0) && (n->category >= 5)) ir3_instr_create(block, 0, OPC_NOP); + if (is_nop(n) && shader->instrs_count) { + struct ir3_instruction *last = + shader->instrs[shader->instrs_count-1]; + if (is_nop(last) && (last->repeat < 5)) { + last->repeat++; + last->flags |= n->flags; + continue; + } + } + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) -- cgit v1.2.3