summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2013-03-30 00:15:54 -0700
committerChris Forbes <[email protected]>2013-06-16 00:47:50 +1200
commit1b77d2133c41c4eb475b81967a3e4d39196c7fe1 (patch)
treede63539101eee87e3520abac242172eedf363041
parentfcaa48d9cc8937e0ceb59dfd22ef5b6e6fd1a273 (diff)
i965: Implement 16-wide math on G45 and Ironlake.
[chrisf:] Improves performance in CS:S video stress test by about 2%. No piglit regressions on Ironlake. Reviewed-by: Chris Forbes <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp25
2 files changed, 28 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 762e2508d22..bcda339f857 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -525,6 +525,9 @@ private:
void generate_math_gen4(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src);
+ void generate_math_g45(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
bool negate_value);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 9b7e68acb8f..9e1cf4a2b62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -332,6 +332,29 @@ fs_generator::generate_math_gen4(fs_inst *inst,
}
void
+fs_generator::generate_math_g45(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ if (inst->opcode == SHADER_OPCODE_POW ||
+ inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
+ inst->opcode == SHADER_OPCODE_INT_REMAINDER) {
+ generate_math_gen4(inst, dst, src);
+ return;
+ }
+
+ int op = brw_math_function(inst->opcode);
+
+ assert(inst->mlen >= 1);
+
+ brw_math(p, dst,
+ op,
+ inst->base_mrf, src,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+void
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
{
int msg_type = -1;
@@ -1307,6 +1330,8 @@ fs_generator::generate_code(exec_list *instructions)
generate_math1_gen7(inst, dst, src[0]);
} else if (intel->gen == 6) {
generate_math1_gen6(inst, dst, src[0]);
+ } else if (intel->gen == 5 || intel->is_g4x) {
+ generate_math_g45(inst, dst, src[0]);
} else {
generate_math_gen4(inst, dst, src[0]);
}