diff options
author | Rob Clark <[email protected]> | 2014-02-21 18:03:30 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-02-23 14:58:23 -0500 |
commit | 9bbfae62657f44251562174ac1f80b2e4278a52d (patch) | |
tree | f2377c18c4ff63d54b3fac08973cb7f78d3ebc96 /src/gallium | |
parent | bb255fdf06caa7fcf9af7c65524beb28a3a4faf5 (diff) |
freedreno/a3xx/compiler: collapse nop's with repeat
Easier than making more extensive use of rpt, and the more compact
shaders seem to bring some bit of performance boost. (Perhaps repeat
flag benefits are more than just instruction cache, possibly it saves
on instruction decode as well?)
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 10 |
2 files changed, 15 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h index 9c57a653553..894db175076 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3.h +++ b/src/gallium/drivers/freedreno/a3xx/ir3.h @@ -312,6 +312,11 @@ static inline bool is_flow(struct ir3_instruction *instr) return (instr->category == 0); } +static inline bool is_nop(struct ir3_instruction *instr) +{ + return is_flow(instr) && (instr->opc == OPC_NOP); +} + static inline bool is_alu(struct ir3_instruction *instr) { return (1 <= instr->category) && (instr->category <= 3); diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index 5df57e776f9..06a86ff3b2d 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -542,6 +542,16 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) if ((shader->instrs_count == 0) && (n->category >= 5)) ir3_instr_create(block, 0, OPC_NOP); + if (is_nop(n) && shader->instrs_count) { + struct ir3_instruction *last = + shader->instrs[shader->instrs_count-1]; + if (is_nop(last) && (last->repeat < 5)) { + last->repeat++; + last->flags |= n->flags; + continue; + } + } + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) |