summaryrefslogtreecommitdiffstats
path: root/src/glsl/ir_optimization.h
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2013-04-17 17:30:25 -0700
committerKenneth Graunke <[email protected]>2013-05-12 09:36:46 -0700
commite413d3f15ca72b82ca29e43f010597a83427d5e8 (patch)
tree89436a108f3e2f53ebab46c30d80c74ef6336277 /src/glsl/ir_optimization.h
parent72a0b7a43531eb5e5cc4355941957864f86dd719 (diff)
glsl: Add a pass to flip matrix/vector multiplies to use dot products.
This pass flips (matrix * vector) operations to (vector * matrixTranspose) for certain built-in matrices (currently gl_ModelViewProjectionMatrix and gl_TextureMatrix). This is equivalent, but results in dot products rather than multiplies and adds. On some hardware, this is more efficient. This pass is conditionalized on ctx->mvp_with_dp4, the flag drivers set to indicate they prefer dot products. Improves performance in Lightsmark by 1.01131% +/- 0.162069% (n = 10) on a Haswell GT2 system. Passes Piglit on Ivybridge. v2: Use struct gl_shader_compiler_options instead of plumbing through another boolean flag for this purpose. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Ian Romanick <[email protected]> Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/glsl/ir_optimization.h')
-rw-r--r--src/glsl/ir_optimization.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 9d28e9166e9..28093cfe1a1 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -80,6 +80,7 @@ bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned);
bool do_dead_code_local(exec_list *instructions);
bool do_dead_code_unlinked(exec_list *instructions);
bool do_dead_functions(exec_list *instructions);
+bool opt_flip_matrices(exec_list *instructions);
bool do_function_inlining(exec_list *instructions);
bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
bool do_lower_texture_projection(exec_list *instructions);