summaryrefslogtreecommitdiffstats
path: root/src/mesa/vbo/vbo_exec_array.c
diff options
context:
space:
mode:
authorTimothy Arceri <[email protected]>2014-10-29 23:05:59 +1100
committerMatt Turner <[email protected]>2014-11-06 11:39:59 -0800
commit13786172181bf5a753c706a7f5c3eb5d448e244e (patch)
treeae36df3acf02742f3e8b0e9beb57ca8d82064c71 /src/mesa/vbo/vbo_exec_array.c
parent9557cf7d0d2e5a76a5277c2a4825e265609b2fca (diff)
mesa: Add SSE 4.1 optimisation for glDrawElements.
Makes use of SSE 4.1 to speed up compute of min and max elements. Callgrind cpu usage results from pts benchmarks: Openarena 0.8.8: 3.67% -> 1.03% UrbanTerror: 2.36% -> 0.81% V5: - actually make use of the optimisation in android (Emil Velikov) - set a better array size limit for using SSE and added TODO V4: - fixed bugs with incrementing pointer and updating counters V3: - Removed sse_minmax.c from Makefile.sources - handle the first few values without SSE until the pointer is aligned and use _mm_load_si128 rather than _mm_loadu_si128 - guard the call to the SSE code better at build time V2: - removed GL* types - use _mm_store_si128() rather than _mm_store_ps() - add runtime check for SSE - use aligned attribute for local mix/max - bunch of tidyups Reviewed-by: Juha-Pekka Heikkila <[email protected]> Reviewed-by: Matt Turner <[email protected]> Signed-off-by: Timothy Arceri <[email protected]>
Diffstat (limited to 'src/mesa/vbo/vbo_exec_array.c')
-rw-r--r--src/mesa/vbo/vbo_exec_array.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 045dbb5063d..e623b361a02 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -36,6 +36,8 @@
#include "main/enums.h"
#include "main/macros.h"
#include "main/transformfeedback.h"
+#include "main/sse_minmax.h"
+#include "x86/common_x86_asm.h"
#include "vbo_context.h"
@@ -119,10 +121,16 @@ vbo_get_minmax_index(struct gl_context *ctx,
}
}
else {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+#if defined(USE_SSE41)
+ if (cpu_has_sse4_1) {
+ _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
}
+ else
+#endif
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
}
*min_index = min_ui;
*max_index = max_ui;