i965/fs: Enforce extended math exec size limits during SIMD lowering.

This teaches the SIMD lowering pass about the hardware limits on the execution size of math instructions, which will allow simplifying the generator code and at the same time get rid of a number of bugs in the manual SIMD unrolling done currently that prevent SIMD32 codegen from working. Reviewed-by: Jason Ekstrand <[email protected]>
author: Francisco Jerez <[email protected]> 2016-05-20 13:14:20 -0700
committer: Francisco Jerez <[email protected]> 2016-05-27 23:19:21 -0700
commit: 2b5adb942bad418058d266c85c396040d558f680 (patch)
tree: 5cde265ed7b2474dd9e9041dd7c1c9ab10d69c47 /src
parent: a8e7b4f1d9ec50d2214e7694da26af6a108e506f (diff)
1 files changed, 24 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5d3b9f70f45..4666fa5ea9a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4661,16 +4661,6 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
    case BRW_OPCODE_SAD2:
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_LRP:
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_POW:
-   case SHADER_OPCODE_INT_QUOTIENT:
-   case SHADER_OPCODE_INT_REMAINDER:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
    case FS_OPCODE_PACK: {
       /* According to the PRMs:
        *  "A. In Direct Addressing mode, a source cannot span more than 2
@@ -4692,6 +4682,30 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
       return inst->exec_size / DIV_ROUND_UP(reg_count, 2);
    }
 
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      /* Unary extended math instructions are limited to SIMD8 on Gen4 and
+       * Gen6.
+       */
+      return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
+              devinfo->gen == 5 || devinfo->is_g4x ? MIN2(16, inst->exec_size) :
+              MIN2(8, inst->exec_size));
+
+   case SHADER_OPCODE_POW:
+      /* SIMD16 is only allowed on Gen7+. */
+      return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
+              MIN2(8, inst->exec_size));
+
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      /* Integer division is limited to SIMD8 on all generations. */
+      return MIN2(8, inst->exec_size);
+
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
       /* Pre-ILK hardware doesn't have a SIMD8 variant of the texel fetch
        * message used to implement varying pull constant loads, so expand it
author	Francisco Jerez <[email protected]>	2016-05-20 13:14:20 -0700
committer	Francisco Jerez <[email protected]>	2016-05-27 23:19:21 -0700
commit	2b5adb942bad418058d266c85c396040d558f680 (patch)
tree	5cde265ed7b2474dd9e9041dd7c1c9ab10d69c47 /src
parent	a8e7b4f1d9ec50d2214e7694da26af6a108e506f (diff)