summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a3xx
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-02-21 18:03:30 -0500
committerRob Clark <[email protected]>2014-02-23 14:58:23 -0500
commit9bbfae62657f44251562174ac1f80b2e4278a52d (patch)
treef2377c18c4ff63d54b3fac08973cb7f78d3ebc96 /src/gallium/drivers/freedreno/a3xx
parentbb255fdf06caa7fcf9af7c65524beb28a3a4faf5 (diff)
freedreno/a3xx/compiler: collapse nop's with repeat
Easier than making more extensive use of rpt, and the more compact shaders seem to bring some bit of performance boost. (Perhaps repeat flag benefits are more than just instruction cache, possibly it saves on instruction decode as well?) Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3.h5
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3_ra.c10
2 files changed, 15 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
index 9c57a653553..894db175076 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.h
@@ -312,6 +312,11 @@ static inline bool is_flow(struct ir3_instruction *instr)
return (instr->category == 0);
}
+static inline bool is_nop(struct ir3_instruction *instr)
+{
+ return is_flow(instr) && (instr->opc == OPC_NOP);
+}
+
static inline bool is_alu(struct ir3_instruction *instr)
{
return (1 <= instr->category) && (instr->category <= 3);
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
index 5df57e776f9..06a86ff3b2d 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
@@ -542,6 +542,16 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if ((shader->instrs_count == 0) && (n->category >= 5))
ir3_instr_create(block, 0, OPC_NOP);
+ if (is_nop(n) && shader->instrs_count) {
+ struct ir3_instruction *last =
+ shader->instrs[shader->instrs_count-1];
+ if (is_nop(last) && (last->repeat < 5)) {
+ last->repeat++;
+ last->flags |= n->flags;
+ continue;
+ }
+ }
+
shader->instrs[shader->instrs_count++] = n;
if (is_sfu(n))