main: memcpy larger chunks in _mesa_propagate_uniforms_to_driver_storage

When possible, do the memcpy on larger blocks. This reduces cycles spent in _mesa_propagate_uniforms_to_driver_storage from 1.51 % to 0.62% according to perf during the Unigine Heaven benchmark. It did not affect the framerate of the benchmark. The system used for testing was an i5 6600K with a Radeon R9 380. Piglit hangs randomly on this system both with and without the patch so i could not make a comparison. v2: fixed whitespace Signed-off-by: Nils Wallménius <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
author: Nils Wallménius <[email protected]> 2016-07-22 13:10:03 +0200
committer: Nicolai Hähnle <[email protected]> 2016-07-25 13:51:16 +0200
commit: a354c389f524c2aa0fa64ac8b7e3d93c2cea4b81 (patch)
tree: c2bc04c2cbc3d9bb6935ee55cb2cbc9871aea27c
parent: dd208ea006c3c66bf64f21ecc1eaffdc0e57db5e (diff)
1 files changed, 23 insertions, 6 deletions
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index ab22a0ed86a..b9b9ff23ffa 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -578,14 +578,31 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni,
 	 unsigned j;
 	 unsigned v;
 
-	 for (j = 0; j < count; j++) {
-	    for (v = 0; v < vectors; v++) {
-	       memcpy(dst, src, src_vector_byte_stride);
-	       src += src_vector_byte_stride;
-	       dst += store->vector_stride;
+	 if (src_vector_byte_stride == store->vector_stride) {
+	    if (extra_stride) {
+	       for (j = 0; j < count; j++) {
+	          memcpy(dst, src, src_vector_byte_stride * vectors);
+	          src += src_vector_byte_stride * vectors;
+	          dst += store->vector_stride * vectors;
+
+	          dst += extra_stride;
+	       }
+	    } else {
+	       /* Unigine Heaven benchmark gets here */
+	       memcpy(dst, src, src_vector_byte_stride * vectors * count);
+	       src += src_vector_byte_stride * vectors * count;
+	       dst += store->vector_stride * vectors * count;
 	    }
+	 } else {
+	    for (j = 0; j < count; j++) {
+	       for (v = 0; v < vectors; v++) {
+	          memcpy(dst, src, src_vector_byte_stride);
+	          src += src_vector_byte_stride;
+	          dst += store->vector_stride;
+	       }
 
-	    dst += extra_stride;
+	       dst += extra_stride;
+	    }
 	 }
 	 break;
       }
author	Nils Wallménius <[email protected]>	2016-07-22 13:10:03 +0200
committer	Nicolai Hähnle <[email protected]>	2016-07-25 13:51:16 +0200
commit	a354c389f524c2aa0fa64ac8b7e3d93c2cea4b81 (patch)
tree	c2bc04c2cbc3d9bb6935ee55cb2cbc9871aea27c
parent	dd208ea006c3c66bf64f21ecc1eaffdc0e57db5e (diff)