summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2013-03-28 11:15:20 -0700
committerMatt Turner <[email protected]>2013-03-29 10:13:27 -0700
commitf085b21b25f76157dd91e57a022e5f5465dc86f9 (patch)
tree625ee58248fcebd449ba0a9e8207bea5b06fd042
parent414ea2f5609cbe1d03cb8acdeee05eda03f9a1c5 (diff)
i965/fs: Increase and document MAD latency on Gen7.
58% of mad(8) generated in shader-db are reading registers from the same bank. Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp22
1 files changed, 18 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index ec558e385d6..f0ef4701e31 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -129,15 +129,29 @@ schedule_node::set_latency_gen7(bool is_haswell)
{
switch (inst->opcode) {
case BRW_OPCODE_MAD:
- /* 3 cycles (this is said to be 4 cycles sometimes depending on the
- * register numbers in the sources):
+ /* 2 cycles
+ * (since the last two src operands are in different register banks):
+ * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+ *
+ * 3 cycles on IVB, 4 on HSW
+ * (since the last two src operands are in the same register bank):
* mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
*
- * 20 cycles:
+ * 18 cycles on IVB, 16 on HSW
+ * (since the last two src operands are in different register banks):
+ * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+ * mov(8) null g4<4,5,1>F { align16 WE_normal 1Q };
+ *
+ * 20 cycles on IVB, 18 on HSW
+ * (since the last two src operands are in the same register bank):
* mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
* mov(8) null g4<4,4,1>F { align16 WE_normal 1Q };
*/
- latency = is_haswell ? 16 : 17;
+
+ /* Our register allocator doesn't know about register banks, so use the
+ * higher latency.
+ */
+ latency = is_haswell ? 16 : 18;
break;
case BRW_OPCODE_LRP: