i965/fs: Add LRP instruction latency.

Set its latency to what happens to be the default floating-point instruction latency. One day we may want to handle latency based on register bank information. Reviewed-by: Eric Anholt <[email protected]>
author: Matt Turner <[email protected]> 2013-03-28 10:57:34 -0700
committer: Matt Turner <[email protected]> 2013-03-29 10:13:27 -0700
commit: 414ea2f5609cbe1d03cb8acdeee05eda03f9a1c5 (patch)
tree: cac67d3a825d99a32ef8db6d5a6f38c1a46f164a /src
parent: ad4507b355a1cf817a0d280625f485bb1856c65c (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d2420cd08c6..ec558e385d6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -140,6 +140,32 @@ schedule_node::set_latency_gen7(bool is_haswell)
       latency = is_haswell ? 16 : 17;
       break;
 
+   case BRW_OPCODE_LRP:
+      /* 2 cycles
+       *  (since the last two src operands are in different register banks):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 3 cycles on IVB, 4 on HSW
+       *  (since the last two src operands are in the same register bank):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 16 cycles on IVB, 14 on HSW
+       *  (since the last two src operands are in different register banks):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,4,1>F                     { align16 WE_normal 1Q };
+       *
+       * 16 cycles
+       *  (since the last two src operands are in the same register bank):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,4,1>F                     { align16 WE_normal 1Q };
+       */
+
+      /* Our register allocator doesn't know about register banks, so use the
+       * higher latency.
+       */
+      latency = 14;
+      break;
+
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
    case SHADER_OPCODE_SQRT:
author	Matt Turner <[email protected]>	2013-03-28 10:57:34 -0700
committer	Matt Turner <[email protected]>	2013-03-29 10:13:27 -0700
commit	414ea2f5609cbe1d03cb8acdeee05eda03f9a1c5 (patch)
tree	cac67d3a825d99a32ef8db6d5a6f38c1a46f164a /src
parent	ad4507b355a1cf817a0d280625f485bb1856c65c (diff)