diff options
author | Kenneth Graunke <[email protected]> | 2013-03-30 00:15:54 -0700 |
---|---|---|
committer | Chris Forbes <[email protected]> | 2013-06-16 00:47:50 +1200 |
commit | 1b77d2133c41c4eb475b81967a3e4d39196c7fe1 (patch) | |
tree | de63539101eee87e3520abac242172eedf363041 /src/mesa | |
parent | fcaa48d9cc8937e0ceb59dfd22ef5b6e6fd1a273 (diff) |
i965: Implement 16-wide math on G45 and Ironlake.
[chrisf:]
Improves performance in CS:S video stress test by about 2%.
No piglit regressions on Ironlake.
Reviewed-by: Chris Forbes <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 25 |
2 files changed, 28 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 762e2508d22..bcda339f857 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -525,6 +525,9 @@ private: void generate_math_gen4(fs_inst *inst, struct brw_reg dst, struct brw_reg src); + void generate_math_g45(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, bool negate_value); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 9b7e68acb8f..9e1cf4a2b62 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -332,6 +332,29 @@ fs_generator::generate_math_gen4(fs_inst *inst, } void +fs_generator::generate_math_g45(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + if (inst->opcode == SHADER_OPCODE_POW || + inst->opcode == SHADER_OPCODE_INT_QUOTIENT || + inst->opcode == SHADER_OPCODE_INT_REMAINDER) { + generate_math_gen4(inst, dst, src); + return; + } + + int op = brw_math_function(inst->opcode); + + assert(inst->mlen >= 1); + + brw_math(p, dst, + op, + inst->base_mrf, src, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +void fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) { int msg_type = -1; @@ -1307,6 +1330,8 @@ fs_generator::generate_code(exec_list *instructions) generate_math1_gen7(inst, dst, src[0]); } else if (intel->gen == 6) { generate_math1_gen6(inst, dst, src[0]); + } else if (intel->gen == 5 || intel->is_g4x) { + generate_math_g45(inst, dst, src[0]); } else { generate_math_gen4(inst, dst, src[0]); } |