summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBradley Sepos <[email protected]>2019-10-14 13:42:12 -0400
committerBradley Sepos <[email protected]>2019-10-14 13:57:23 -0400
commit202c8f7145d210c86a149106046ceb9d966e6e27 (patch)
tree14eca5ad18b41714b6019a42efe0ec7edb4b6f3d
parentad043e0399856d16aef46f5fb69e0e5c00d99f76 (diff)
contrib: Add patch to fix AQ-related slowdown in x265 3.2.
-rw-r--r--contrib/x265/A00-fix-aq-slowdown.patch73
-rw-r--r--contrib/x265_10bit/A00-fix-aq-slowdown.patch73
-rw-r--r--contrib/x265_12bit/A00-fix-aq-slowdown.patch73
-rw-r--r--contrib/x265_8bit/A00-fix-aq-slowdown.patch73
4 files changed, 292 insertions, 0 deletions
diff --git a/contrib/x265/A00-fix-aq-slowdown.patch b/contrib/x265/A00-fix-aq-slowdown.patch
new file mode 100644
index 000000000..b0321f303
--- /dev/null
+++ b/contrib/x265/A00-fix-aq-slowdown.patch
@@ -0,0 +1,73 @@
+# HG changeset patch
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent 329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+ {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+- uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+- int maxHeight = numCuInHeight * param->maxCUSize;
+- intptr_t stride = curFrame->m_fencPic->m_stride;
+- pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++ pixel *edgePic = NULL;
++ pixel *gaussianPic = NULL;
++ pixel *thetaPic = NULL;
++
+ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++ int maxHeight = numCuInHeight * param->maxCUSize;
++ intptr_t stride = curFrame->m_fencPic->m_stride;
++ edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+ edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++ }
+
+ int blockXY = 0, inclinedEdge = 0;
+ double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+ energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+ if (param->rc.aqMode == X265_AQ_EDGE)
+ {
+- pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+- pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+ edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+ if (edgeDensity)
+ {
+@@ -542,9 +549,13 @@
+ else
+ strength = param->rc.aqStrength * 1.0397f;
+
+- X265_FREE(edgePic);
+- X265_FREE(gaussianPic);
+- X265_FREE(thetaPic);
++ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ X265_FREE(edgePic);
++ X265_FREE(gaussianPic);
++ X265_FREE(thetaPic);
++ }
++
+ blockXY = 0;
+ for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+ {
diff --git a/contrib/x265_10bit/A00-fix-aq-slowdown.patch b/contrib/x265_10bit/A00-fix-aq-slowdown.patch
new file mode 100644
index 000000000..b0321f303
--- /dev/null
+++ b/contrib/x265_10bit/A00-fix-aq-slowdown.patch
@@ -0,0 +1,73 @@
+# HG changeset patch
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent 329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+ {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+- uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+- int maxHeight = numCuInHeight * param->maxCUSize;
+- intptr_t stride = curFrame->m_fencPic->m_stride;
+- pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++ pixel *edgePic = NULL;
++ pixel *gaussianPic = NULL;
++ pixel *thetaPic = NULL;
++
+ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++ int maxHeight = numCuInHeight * param->maxCUSize;
++ intptr_t stride = curFrame->m_fencPic->m_stride;
++ edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+ edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++ }
+
+ int blockXY = 0, inclinedEdge = 0;
+ double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+ energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+ if (param->rc.aqMode == X265_AQ_EDGE)
+ {
+- pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+- pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+ edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+ if (edgeDensity)
+ {
+@@ -542,9 +549,13 @@
+ else
+ strength = param->rc.aqStrength * 1.0397f;
+
+- X265_FREE(edgePic);
+- X265_FREE(gaussianPic);
+- X265_FREE(thetaPic);
++ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ X265_FREE(edgePic);
++ X265_FREE(gaussianPic);
++ X265_FREE(thetaPic);
++ }
++
+ blockXY = 0;
+ for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+ {
diff --git a/contrib/x265_12bit/A00-fix-aq-slowdown.patch b/contrib/x265_12bit/A00-fix-aq-slowdown.patch
new file mode 100644
index 000000000..b0321f303
--- /dev/null
+++ b/contrib/x265_12bit/A00-fix-aq-slowdown.patch
@@ -0,0 +1,73 @@
+# HG changeset patch
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent 329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+ {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+- uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+- int maxHeight = numCuInHeight * param->maxCUSize;
+- intptr_t stride = curFrame->m_fencPic->m_stride;
+- pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++ pixel *edgePic = NULL;
++ pixel *gaussianPic = NULL;
++ pixel *thetaPic = NULL;
++
+ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++ int maxHeight = numCuInHeight * param->maxCUSize;
++ intptr_t stride = curFrame->m_fencPic->m_stride;
++ edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+ edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++ }
+
+ int blockXY = 0, inclinedEdge = 0;
+ double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+ energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+ if (param->rc.aqMode == X265_AQ_EDGE)
+ {
+- pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+- pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+ edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+ if (edgeDensity)
+ {
+@@ -542,9 +549,13 @@
+ else
+ strength = param->rc.aqStrength * 1.0397f;
+
+- X265_FREE(edgePic);
+- X265_FREE(gaussianPic);
+- X265_FREE(thetaPic);
++ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ X265_FREE(edgePic);
++ X265_FREE(gaussianPic);
++ X265_FREE(thetaPic);
++ }
++
+ blockXY = 0;
+ for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+ {
diff --git a/contrib/x265_8bit/A00-fix-aq-slowdown.patch b/contrib/x265_8bit/A00-fix-aq-slowdown.patch
new file mode 100644
index 000000000..b0321f303
--- /dev/null
+++ b/contrib/x265_8bit/A00-fix-aq-slowdown.patch
@@ -0,0 +1,73 @@
+# HG changeset patch
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent 329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+ {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+- uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+- int maxHeight = numCuInHeight * param->maxCUSize;
+- intptr_t stride = curFrame->m_fencPic->m_stride;
+- pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+- memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+- memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++ pixel *edgePic = NULL;
++ pixel *gaussianPic = NULL;
++ pixel *thetaPic = NULL;
++
+ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++ int maxHeight = numCuInHeight * param->maxCUSize;
++ intptr_t stride = curFrame->m_fencPic->m_stride;
++ edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++ memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++ memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+ edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++ }
+
+ int blockXY = 0, inclinedEdge = 0;
+ double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+ energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+ if (param->rc.aqMode == X265_AQ_EDGE)
+ {
+- pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+- pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++ pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+ edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+ if (edgeDensity)
+ {
+@@ -542,9 +549,13 @@
+ else
+ strength = param->rc.aqStrength * 1.0397f;
+
+- X265_FREE(edgePic);
+- X265_FREE(gaussianPic);
+- X265_FREE(thetaPic);
++ if (param->rc.aqMode == X265_AQ_EDGE)
++ {
++ X265_FREE(edgePic);
++ X265_FREE(gaussianPic);
++ X265_FREE(thetaPic);
++ }
++
+ blockXY = 0;
+ for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+ {