summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorNils Wallménius <[email protected]>2016-07-22 13:10:03 +0200
committerNicolai Hähnle <[email protected]>2016-07-25 13:51:16 +0200
commita354c389f524c2aa0fa64ac8b7e3d93c2cea4b81 (patch)
treec2bc04c2cbc3d9bb6935ee55cb2cbc9871aea27c /src
parentdd208ea006c3c66bf64f21ecc1eaffdc0e57db5e (diff)
main: memcpy larger chunks in _mesa_propagate_uniforms_to_driver_storage
When possible, do the memcpy on larger blocks. This reduces cycles spent in _mesa_propagate_uniforms_to_driver_storage from 1.51 % to 0.62% according to perf during the Unigine Heaven benchmark. It did not affect the framerate of the benchmark. The system used for testing was an i5 6600K with a Radeon R9 380. Piglit hangs randomly on this system both with and without the patch so i could not make a comparison. v2: fixed whitespace Signed-off-by: Nils Wallménius <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/main/uniform_query.cpp29
1 files changed, 23 insertions, 6 deletions
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index ab22a0ed86a..b9b9ff23ffa 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -578,14 +578,31 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni,
unsigned j;
unsigned v;
- for (j = 0; j < count; j++) {
- for (v = 0; v < vectors; v++) {
- memcpy(dst, src, src_vector_byte_stride);
- src += src_vector_byte_stride;
- dst += store->vector_stride;
+ if (src_vector_byte_stride == store->vector_stride) {
+ if (extra_stride) {
+ for (j = 0; j < count; j++) {
+ memcpy(dst, src, src_vector_byte_stride * vectors);
+ src += src_vector_byte_stride * vectors;
+ dst += store->vector_stride * vectors;
+
+ dst += extra_stride;
+ }
+ } else {
+ /* Unigine Heaven benchmark gets here */
+ memcpy(dst, src, src_vector_byte_stride * vectors * count);
+ src += src_vector_byte_stride * vectors * count;
+ dst += store->vector_stride * vectors * count;
}
+ } else {
+ for (j = 0; j < count; j++) {
+ for (v = 0; v < vectors; v++) {
+ memcpy(dst, src, src_vector_byte_stride);
+ src += src_vector_byte_stride;
+ dst += store->vector_stride;
+ }
- dst += extra_stride;
+ dst += extra_stride;
+ }
}
break;
}