diff options
author | bradleys <[email protected]> | 2015-02-11 20:58:05 +0000 |
---|---|---|
committer | bradleys <[email protected]> | 2015-02-11 20:58:05 +0000 |
commit | b02dbbdba62bffe7e3224da17c5f2b0585dd24d1 (patch) | |
tree | 3341ae7be03e76913920bee4b557b060c025f4ba /libhb/nlmeans_x86.c | |
parent | 90bb32c1fc211087736ca52e267c19bf0239bdfe (diff) |
libhb: Additional minor optimizations to nlmeans.
Assume buffered planes are equal size in nlmeans.
Make nlmeans scalar counters read like accelerated counters (more readable and saves ~2 cycles).
Yet more const correctness.
Clarify some variable names for readability.
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@6896 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb/nlmeans_x86.c')
-rw-r--r-- | libhb/nlmeans_x86.c | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/libhb/nlmeans_x86.c b/libhb/nlmeans_x86.c index 685ac857e..aa727d96a 100644 --- a/libhb/nlmeans_x86.c +++ b/libhb/nlmeans_x86.c @@ -18,26 +18,27 @@ static void build_integral_sse2(uint32_t *integral, int integral_stride, const uint8_t *src, const uint8_t *src_pre, - int src_w, const uint8_t *compare, const uint8_t *compare_pre, - int compare_w, int w, - int h, + int border, + int dst_w, + int dst_h, int dx, int dy) { const __m128i zero = _mm_set1_epi8(0); + const int bw = w + 2 * border; - for (int y = 0; y < h; y++) + for (int y = 0; y < dst_h; y++) { __m128i prevadd = _mm_set1_epi32(0); - const uint8_t *p1 = src_pre + y*src_w; - const uint8_t *p2 = compare_pre + (y+dy)*compare_w + dx; + const uint8_t *p1 = src_pre + y*bw; + const uint8_t *p2 = compare_pre + (y+dy)*bw + dx; uint32_t *out = integral + (y*integral_stride); - for (int x = 0; x < w; x += 16) + for (int x = 0; x < dst_w; x += 16) { __m128i pa, pb; __m128i pla, plb; @@ -119,7 +120,7 @@ static void build_integral_sse2(uint32_t *integral, { out = integral + y*integral_stride; - for (int x = 0; x < w; x += 16) + for (int x = 0; x < dst_w; x += 16) { *((__m128i*)out) = _mm_add_epi32(*(__m128i*)(out-integral_stride), *(__m128i*)(out)); |