summaryrefslogtreecommitdiffstats
path: root/libhb
diff options
context:
space:
mode:
Diffstat (limited to 'libhb')
-rw-r--r--libhb/scale.c1020
-rw-r--r--libhb/scale.h324
-rw-r--r--libhb/scale_kernel.c223
-rw-r--r--libhb/scale_kernel.h20
-rw-r--r--libhb/vadxva2.c29
5 files changed, 0 insertions, 1616 deletions
diff --git a/libhb/scale.c b/libhb/scale.c
deleted file mode 100644
index 9dcff28ac..000000000
--- a/libhb/scale.c
+++ /dev/null
@@ -1,1020 +0,0 @@
-/* scale.c
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
-
- */
-
-#ifdef USE_OPENCL
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "hb.h"
-#include "scale.h"
-#include "scale_kernel.h"
-#include "libavutil/pixdesc.h"
-
-#define isScaleRGBinInt(x) \
- ( \
- (x)==AV_PIX_FMT_RGB48BE || \
- (x)==AV_PIX_FMT_RGB48LE || \
- (x)==AV_PIX_FMT_RGB32 || \
- (x)==AV_PIX_FMT_RGB32_1 || \
- (x)==AV_PIX_FMT_RGB24 || \
- (x)==AV_PIX_FMT_RGB565BE || \
- (x)==AV_PIX_FMT_RGB565LE || \
- (x)==AV_PIX_FMT_RGB555BE || \
- (x)==AV_PIX_FMT_RGB555LE || \
- (x)==AV_PIX_FMT_RGB444BE || \
- (x)==AV_PIX_FMT_RGB444LE || \
- (x)==AV_PIX_FMT_RGB8 || \
- (x)==AV_PIX_FMT_RGB4 || \
- (x)==AV_PIX_FMT_RGB4_BYTE || \
- (x)==AV_PIX_FMT_MONOBLACK || \
- (x)==AV_PIX_FMT_MONOWHITE \
- )
-#define isScaleBGRinInt(x) \
- ( \
- (x)==AV_PIX_FMT_BGR48BE || \
- (x)==AV_PIX_FMT_BGR48LE || \
- (x)==AV_PIX_FMT_BGR32 || \
- (x)==AV_PIX_FMT_BGR32_1 || \
- (x)==AV_PIX_FMT_BGR24 || \
- (x)==AV_PIX_FMT_BGR565BE || \
- (x)==AV_PIX_FMT_BGR565LE || \
- (x)==AV_PIX_FMT_BGR555BE || \
- (x)==AV_PIX_FMT_BGR555LE || \
- (x)==AV_PIX_FMT_BGR444BE || \
- (x)==AV_PIX_FMT_BGR444LE || \
- (x)==AV_PIX_FMT_BGR8 || \
- (x)==AV_PIX_FMT_BGR4 || \
- (x)==AV_PIX_FMT_BGR4_BYTE|| \
- (x)==AV_PIX_FMT_MONOBLACK|| \
- (x)==AV_PIX_FMT_MONOWHITE \
- )
-
-#define isScaleAnyRGB(x) \
- ( \
- isScaleRGBinInt(x) || \
- isScaleBGRinInt(x) \
- )
-
-#define isScaleGray(x) \
- ((x) == AV_PIX_FMT_GRAY8 || \
- (x) == AV_PIX_FMT_Y400A || \
- (x) == AV_PIX_FMT_GRAY16BE || \
- (x) == AV_PIX_FMT_GRAY16LE)
-
-static ScaleContext *g_scale;
-
-static double getScaleSplineCoeff(double a, double b, double c, double d, double dist)
-{
- if (dist <= 1.0)
- return ((d * dist + c) * dist + b) * dist + a;
- else
- return getScaleSplineCoeff(0.0,
- b + 2.0 * c + 3.0 * d,
- c + 3.0 * d,
- -b - 3.0 * c - 6.0 * d,
- dist - 1.0);
-}
-
-static int initScaleFilter(int16_t **outFilter, int32_t **filterPos,
- int *outFilterSize, int xInc, int srcW, int dstW,
- int filterAlign, int one, int flags, int cpu_flags,
- ScaleVector *srcFilter, ScaleVector *dstFilter,
- double param[2])
-{
- int i;
- int filterSize;
- int filter2Size;
- int minFilterSize;
- int64_t *filter = NULL;
- int64_t *filter2 = NULL;
- const int64_t fone = 1LL << 54;
- int ret = -1;
-
- *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos));
- if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0)
- {
- hb_log("Cannot allocate memory.");
- goto fail;
- }
-
- if (FFABS(xInc - 0x10000) < 10)
- { // unscaled
- int i;
- filterSize = 1;
- filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize);
- if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0)
- {
- hb_log("Cannot allocate memory.");
- goto fail;
- }
-
- for (i = 0; i < dstW; i++)
- {
- filter[i * filterSize] = fone;
- (*filterPos)[i] = i;
- }
- }
- else if (flags & SWS_POINT)
- { // lame looking point sampling mode
- int i;
- int64_t xDstInSrc;
- filterSize = 1;
- filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
- if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0)
- {
- hb_log("Cannot allocate memory.");
- goto fail;
- }
-
- xDstInSrc = xInc / 2 - 0x8000;
- for (i = 0; i < dstW; i++)
- {
- int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
-
- (*filterPos)[i] = xx;
- filter[i] = fone;
- xDstInSrc += xInc;
- }
- }
- else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
- (flags & SWS_FAST_BILINEAR))
- { // bilinear upscale
- int i;
- int64_t xDstInSrc;
- filterSize = 2;
- filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
- if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0)
- {
- hb_log("Cannot allocate memory.");
- goto fail;
- }
-
- xDstInSrc = xInc / 2 - 0x8000;
- for (i = 0; i < dstW; i++)
- {
- int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
- int j;
-
- (*filterPos)[i] = xx;
- // bilinear upscale / linear interpolate / area averaging
- for (j = 0; j < filterSize; j++)
- {
- int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
- if (coeff < 0)
- coeff = 0;
- filter[i * filterSize + j] = coeff;
- xx++;
- }
- xDstInSrc += xInc;
- }
- }
- else
- {
- int64_t xDstInSrc;
- int sizeFactor;
-
- if (flags & SWS_BICUBIC)
- sizeFactor = 4;
- else if (flags & SWS_X)
- sizeFactor = 8;
- else if (flags & SWS_AREA)
- sizeFactor = 1; // downscale only, for upscale it is bilinear
- else if (flags & SWS_GAUSS)
- sizeFactor = 8; // infinite ;)
- else if (flags & SWS_LANCZOS)
- sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
- else if (flags & SWS_SINC)
- sizeFactor = 20; // infinite ;)
- else if (flags & SWS_SPLINE)
- sizeFactor = 20; // infinite ;)
- else if (flags & SWS_BILINEAR)
- sizeFactor = 2;
- else
- {
- sizeFactor = 0; // GCC warning killer
- assert(0);
- }
-
- if (xInc <= 1 << 16)
- filterSize = 1 + sizeFactor; // upscale
- else
- filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
-
-
- filterSize = FFMIN(filterSize, srcW - 2);
- filterSize = FFMAX(filterSize, 1);
-
- filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
- if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0)
- {
- hb_log("Cannot allocate memory.");
- goto fail;
- }
-
- xDstInSrc = xInc - 0x10000;
- for (i = 0; i < dstW; i++)
- {
- int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
- int j;
- (*filterPos)[i] = xx;
- for (j = 0; j < filterSize; j++)
- {
- int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
- double floatd;
- int64_t coeff;
-
- if (xInc > 1 << 16)
- d = d * dstW / srcW;
- floatd = d * (1.0 / (1 << 30));
-
- if (flags & SWS_BICUBIC)
- {
- int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24);
- int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
-
- if (d >= 1LL << 31)
- {
- coeff = 0.0;
- }
- else
- {
- int64_t dd = (d * d) >> 30;
- int64_t ddd = (dd * d) >> 30;
-
- if (d < 1LL << 30)
- coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
- (-18 * (1 << 24) + 12 * B + 6 * C) * dd +
- (6 * (1 << 24) - 2 * B) * (1 << 30);
- else
- coeff = (-B - 6 * C) * ddd +
- (6 * B + 30 * C) * dd +
- (-12 * B - 48 * C) * d +
- (8 * B + 24 * C) * (1 << 30);
- }
- coeff *= fone >> (30 + 24);
- }
-#if 0
- else if (flags & SWS_X)
- {
- double p = param ? param * 0.01 : 0.3;
- coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
- coeff *= pow (2.0, -p * d * d);
- }
-#endif
- else if (flags & SWS_X)
- {
- double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
- double c;
-
- if (floatd < 1.0)
- c = cos(floatd * M_PI);
- else
- c = -1.0;
- if (c < 0.0)
- c = -pow(-c, A);
- else
- c = pow(c, A);
- coeff = (c * 0.5 + 0.5) * fone;
- }
- else if (flags & SWS_AREA)
- {
- int64_t d2 = d - (1 << 29);
- if (d2 * xInc < -(1LL << (29 + 16)))
- coeff = 1.0 * (1LL << (30 + 16));
- else if (d2 * xInc < (1LL << (29 + 16)))
- coeff = -d2 * xInc + (1LL << (29 + 16));
- else
- coeff = 0.0;
- coeff *= fone >> (30 + 16);
- }
- else if (flags & SWS_GAUSS)
- {
- double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
- coeff = (pow(2.0, -p * floatd * floatd)) * fone;
- }
- else if (flags & SWS_SINC)
- {
- coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
- }
- else if (flags & SWS_LANCZOS)
- {
- double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
- coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
- (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
- if (floatd > p)
- coeff = 0;
- }
- else if (flags & SWS_BILINEAR)
- {
- coeff = (1 << 30) - d;
- if (coeff < 0)
- coeff = 0;
- coeff *= fone >> 30;
- }
- else if (flags & SWS_SPLINE)
- {
- double p = -2.196152422706632;
- coeff = getScaleSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
- }
- else
- {
- coeff = 0.0; // GCC warning killer
- assert(0);
- }
-
- filter[i * filterSize + j] = coeff;
- xx++;
- }
- xDstInSrc += 2 * xInc;
- }
- }
-
- assert(filterSize > 0);
- filter2Size = filterSize;
- if (srcFilter)
- filter2Size += srcFilter->length - 1;
- if (dstFilter)
- filter2Size += dstFilter->length - 1;
- assert(filter2Size > 0);
- filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2));
- if (filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0)
- {
- hb_log("Can't alloc memory.");
- goto fail;
- }
-
- for (i = 0; i < dstW; i++)
- {
- int j, k;
-
- if (srcFilter)
- {
- for (k = 0; k < srcFilter->length; k++)
- {
- for (j = 0; j < filterSize; j++)
- filter2[i * filter2Size + k + j] +=
- srcFilter->coeff[k] * filter[i * filterSize + j];
- }
- }
- else
- {
- for (j = 0; j < filterSize; j++)
- filter2[i * filter2Size + j] = filter[i * filterSize + j];
- }
- // FIXME dstFilter
-
- (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
- }
- av_freep(&filter);
-
- // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
- minFilterSize = 0;
- for (i = dstW - 1; i >= 0; i--)
- {
- int min = filter2Size;
- int j;
- int64_t cutOff = 0.0;
-
- for (j = 0; j < filter2Size; j++)
- {
- int k;
- cutOff += FFABS(filter2[i * filter2Size]);
-
- if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
- break;
-
- if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
- break;
-
- // move filter coefficients left
- for (k = 1; k < filter2Size; k++)
- filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
- filter2[i * filter2Size + k - 1] = 0;
- (*filterPos)[i]++;
- }
-
- cutOff = 0;
- for (j = filter2Size - 1; j > 0; j--)
- {
- cutOff += FFABS(filter2[i * filter2Size + j]);
-
- if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
- break;
- min--;
- }
-
- if (min > minFilterSize)
- minFilterSize = min;
- }
-
-
- assert(minFilterSize > 0);
- filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
- assert(filterSize > 0);
- filter = (int64_t *)av_malloc(filterSize * dstW * sizeof(*filter));
- if (filterSize >= MAX_FILTER_SIZE * 16 /
- ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
- goto fail;
- *outFilterSize = filterSize;
-
- if (flags & SWS_PRINT_INFO)
- hb_log("SwScaler: reducing / aligning filtersize %d -> %d",filter2Size,filterSize);
- for (i = 0; i < dstW; i++)
- {
- int j;
-
- for (j = 0; j < filterSize; j++)
- {
- if (j >= filter2Size)
- filter[i * filterSize + j] = 0;
- else
- filter[i * filterSize + j] = filter2[i * filter2Size + j];
- if ((flags & SWS_BITEXACT) && j >= minFilterSize)
- filter[i * filterSize + j] = 0;
- }
- }
-
- // FIXME try to align filterPos if possible
-
- // fix borders
- for (i = 0; i < dstW; i++)
- {
- int j;
- if ((*filterPos)[i] < 0)
- {
- // move filter coefficients left to compensate for filterPos
- for (j = 1; j < filterSize; j++)
- {
- int left = FFMAX(j + (*filterPos)[i], 0);
- filter[i * filterSize + left] += filter[i * filterSize + j];
- filter[i * filterSize + j] = 0;
- }
- (*filterPos)[i]= 0;
- }
-
- if ((*filterPos)[i] + filterSize > srcW)
- {
- int shift = (*filterPos)[i] + filterSize - srcW;
- // move filter coefficients right to compensate for filterPos
- for (j = filterSize - 2; j >= 0; j--)
- {
- int right = FFMIN(j + shift, filterSize - 1);
- filter[i * filterSize + right] += filter[i * filterSize + j];
- filter[i * filterSize + j] = 0;
- }
- (*filterPos)[i]= srcW - filterSize;
- }
- }
-
- // Note the +1 is for the MMX scaler which reads over the end
- // FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
- // *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
- *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t));
- if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0)
- {
- hb_log("Can't alloc memory");
- goto fail;
- }
-
- for (i = 0; i < dstW; i++)
- {
- int j;
- int64_t error = 0;
- int64_t sum = 0;
-
- for (j = 0; j < filterSize; j++)
- {
- sum += filter[i * filterSize + j];
- }
- sum = (sum + one / 2) / one;
- for (j = 0; j < *outFilterSize; j++)
- {
- int64_t v = filter[i * filterSize + j] + error;
- int intV = ROUNDED_DIV(v, sum);
- (*outFilter)[i * (*outFilterSize) + j] = intV;
- error = v - intV * sum;
- }
- }
-
- (*filterPos)[dstW + 0] =
- (*filterPos)[dstW + 1] =
- (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
- for (i = 0; i < *outFilterSize; i++)
- {
- int k = (dstW - 1) * (*outFilterSize) + i;
- (*outFilter)[k + 1 * (*outFilterSize)] =
- (*outFilter)[k + 2 * (*outFilterSize)] =
- (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
- }
-
- ret = 0;
-
-fail:
- av_free(filter);
- av_free(filter2);
- return ret;
-}
-
-static int handle_scale_jpeg(enum PixelFormat *format)
-{
- switch (*format)
- {
- case AV_PIX_FMT_YUVJ420P:
- *format = AV_PIX_FMT_YUV420P;
- return 1;
- case AV_PIX_FMT_YUVJ422P:
- *format = AV_PIX_FMT_YUV422P;
- return 1;
- case AV_PIX_FMT_YUVJ444P:
- *format = AV_PIX_FMT_YUV444P;
- return 1;
- case AV_PIX_FMT_YUVJ440P:
- *format = AV_PIX_FMT_YUV440P;
- return 1;
- default:
- return 0;
- }
-}
-
-static void scaleGetSubSampleFactors(int *h, int *v, enum PixelFormat format)
-{
- *h = av_pix_fmt_descriptors[format].log2_chroma_w;
- *v = av_pix_fmt_descriptors[format].log2_chroma_h;
-}
-
-typedef struct FormatEntry {
- int is_supported_in, is_supported_out;
-} FormatEntry;
-
-static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 0 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 },
- { 0, 1 }, { 1, 1 }, { 1, 1 }, { 0, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 0 }, { 1, 0 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 0 }, { 1, 1 }, { 1, 1 }, { 0, 0 },
- { 0, 0 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
- { 1, 1 }, { 1, 0 }, { 1, 0 }, { 1, 0 }, { 1, 0 }, { 1, 0 },
- { 1, 0 }, { 1, 0 },
-};
-
-int scale_isSupportedInput( enum PixelFormat pix_fmt )
-{
- return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
- format_entries[pix_fmt].is_supported_in : 0;
-}
-
-int scale_isSupportedOutput( enum PixelFormat pix_fmt )
-{
- return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
- format_entries[pix_fmt].is_supported_out : 0;
-}
-
-static void hcscale_fast_c( ScaleContext *c, int16_t *dst1, int16_t *dst2,
- int dstWidth, const uint8_t *src1,
- const uint8_t *src2, int srcW, int xInc )
-{
- int i;
- unsigned int xpos = 0;
- for (i = 0; i < dstWidth; i++)
- {
- register unsigned int xx = xpos >> 16;
- register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
- dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
- dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
- xpos += xInc;
- }
- for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
- {
- dst1[i] = src1[srcW-1]*128;
- dst2[i] = src2[srcW-1]*128;
- }
-}
-
-static void hyscale_fast_c(ScaleContext *c, int16_t *dst, int dstWidth,
- const uint8_t *src, int srcW, int xInc)
-{
- int i;
- unsigned int xpos = 0;
- for (i = 0; i < dstWidth; i++)
- {
- register unsigned int xx = xpos >> 16;
- register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
- dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
- xpos += xInc;
- }
- for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
- dst[i] = src[srcW-1]*128;
-}
-
-static void hScale16To19_c(ScaleContext *c, int16_t *_dst, int dstW,
- const uint8_t *_src, const int16_t *filter,
- const int32_t *filterPos, int filterSize)
-{
- int i;
- int32_t *dst = (int32_t *) _dst;
- const uint16_t *src = (const uint16_t *) _src;
- int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
- int sh = bits - 4;
-
- if((isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8)
- && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
- sh= 9;
-
- for (i = 0; i < dstW; i++)
- {
- int j;
- int srcPos = filterPos[i];
- int val = 0;
-
- for (j = 0; j < filterSize; j++)
- {
- val += src[srcPos + j] * filter[filterSize * i + j];
- }
- dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
- }
-}
-
-static void hScale16To15_c(ScaleContext *c, int16_t *dst, int dstW,
- const uint8_t *_src, const int16_t *filter,
- const int32_t *filterPos, int filterSize)
-{
- int i;
- const uint16_t *src = (const uint16_t *) _src;
- int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-
- if(sh<15)
- sh= isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8
- ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-
- for (i = 0; i < dstW; i++)
- {
- int j;
- int srcPos = filterPos[i];
- int val = 0;
-
- for (j = 0; j < filterSize; j++)
- {
- val += src[srcPos + j] * filter[filterSize * i + j];
- }
- // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
- dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
- }
-}
-
-static void hScale8To15_c(ScaleContext *c, int16_t *dst, int dstW,
- const uint8_t *src, const int16_t *filter,
- const int32_t *filterPos, int filterSize)
-{
- int i;
- for (i = 0; i < dstW; i++)
- {
- int j;
- int srcPos = filterPos[i];
- int val = 0;
- for (j = 0; j < filterSize; j++)
- {
- val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
- }
- dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
- }
-}
-
-static void hScale8To19_c(ScaleContext *c, int16_t *_dst, int dstW,
- const uint8_t *src, const int16_t *filter,
- const int32_t *filterPos, int filterSize)
-{
- int i;
- int32_t *dst = (int32_t *) _dst;
- for (i = 0; i < dstW; i++)
- {
- int j;
- int srcPos = filterPos[i];
- int val = 0;
- for (j = 0; j < filterSize; j++)
- {
- val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
- }
- dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
- }
-}
-
-static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
-{
- int i;
- for (i = 0; i < width; i++)
- {
- dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
- dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
- }
-}
-
-static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
-{
- int i;
- for (i = 0; i < width; i++)
- {
- dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
- dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
- }
-}
-
-static void lumRangeToJpeg_c(int16_t *dst, int width)
-{
- int i;
- for (i = 0; i < width; i++)
- dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
-}
-
-static void lumRangeFromJpeg_c(int16_t *dst, int width)
-{
- int i;
- for (i = 0; i < width; i++)
- dst[i] = (dst[i] * 14071 + 33561947) >> 14;
-}
-
-static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
-{
- int i;
- int32_t *dstU = (int32_t *) _dstU;
- int32_t *dstV = (int32_t *) _dstV;
- for (i = 0; i < width; i++)
- {
- dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
- dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
- }
-}
-
-static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
-{
- int i;
- int32_t *dstU = (int32_t *) _dstU;
- int32_t *dstV = (int32_t *) _dstV;
- for (i = 0; i < width; i++)
- {
- dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
- dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
- }
-}
-
-static void lumRangeToJpeg16_c(int16_t *_dst, int width)
-{
- int i;
- int32_t *dst = (int32_t *) _dst;
- for (i = 0; i < width; i++)
- dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
-}
-
-static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
-{
- int i;
- int32_t *dst = (int32_t *) _dst;
- for (i = 0; i < width; i++)
- dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
-}
-
-static av_cold void sws_init_swScale_c(ScaleContext *c)
-{
- enum PixelFormat srcFormat = c->srcFormat;
-
- ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
- &c->yuv2nv12cX, &c->yuv2packed1,
- &c->yuv2packed2, &c->yuv2packedX);
-
- ff_sws_init_input_funcs(c);
-
- if (c->srcBpc == 8)
- {
- if (c->dstBpc <= 10)
- {
- c->hyScale = c->hcScale = hScale8To15_c;
- if (c->flags & SWS_FAST_BILINEAR)
- {
- c->hyscale_fast = hyscale_fast_c;
- c->hcscale_fast = hcscale_fast_c;
- }
- }
- else
- {
- c->hyScale = c->hcScale = hScale8To19_c;
- }
- }
- else
- {
- c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
- : hScale16To15_c;
- }
-
- if (c->srcRange != c->dstRange && !isScaleAnyRGB(c->dstFormat))
- {
- if (c->dstBpc <= 10)
- {
- if (c->srcRange)
- {
- c->lumConvertRange = lumRangeFromJpeg_c;
- c->chrConvertRange = chrRangeFromJpeg_c;
- }
- else
- {
- c->lumConvertRange = lumRangeToJpeg_c;
- c->chrConvertRange = chrRangeToJpeg_c;
- }
- }
- else
- {
- if (c->srcRange)
- {
- c->lumConvertRange = lumRangeFromJpeg16_c;
- c->chrConvertRange = chrRangeFromJpeg16_c;
- }
- else
- {
- c->lumConvertRange = lumRangeToJpeg16_c;
- c->chrConvertRange = chrRangeToJpeg16_c;
- }
- }
- }
-
- if (!(isScaleGray(srcFormat) || isScaleGray(c->dstFormat) ||
- srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
- c->needs_hcscale = 1;
-}
-
-int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dstFilter)
-{
- ScaleFilter dummyFilter = { NULL, NULL, NULL, NULL };
- int srcW = c->srcW;
- int srcH = c->srcH;
- int dstW = c->dstW;
- int dstH = c->dstH;
- int flags, cpu_flags;
- enum PixelFormat srcFormat = c->srcFormat;
- enum PixelFormat dstFormat = c->dstFormat;
-
- cpu_flags = 0;
- flags = c->flags;
-
- if(srcFormat != c->srcFormat || dstFormat != c->dstFormat)
- {
- hb_log("deprecated pixel format used, make sure you did set range correctly.");
- c->srcFormat = srcFormat;
- c->dstFormat = dstFormat;
- }
-
- if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1)
- {
- hb_log("%dx%d -> %dx%d is invalid scaling dimension.",srcW,srcH,dstW,dstH);
- return -1;
- }
-
- if (!dstFilter)
- dstFilter = &dummyFilter;
- if (!srcFilter)
- srcFilter = &dummyFilter;
-
- c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
- c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
- c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[dstFormat]);
- c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[srcFormat]);
- c->vRounder = 4 * 0x0001000100010001ULL;
-
- scaleGetSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
- scaleGetSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
-
- // drop some chroma lines if the user wants it
- c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >> SWS_SRC_V_CHR_DROP_SHIFT;
- c->chrSrcVSubSample += c->vChrDrop;
- c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample);
- c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample);
- c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
- c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
- c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
- c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
-
- const int filterAlign = 1;
-
- if (initScaleFilter(&c->hLumFilter, &c->hLumFilterPos,
- &c->hLumFilterSize, c->lumXInc,
- srcW, dstW, filterAlign, 1 << 14,
- (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
- cpu_flags, srcFilter->lumH, dstFilter->lumH,
- c->param) < 0)
- goto fail;
-
- if (initScaleFilter(&c->hChrFilter, &c->hChrFilterPos,
- &c->hChrFilterSize, c->chrXInc,
- c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
- (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
- cpu_flags, srcFilter->chrH, dstFilter->chrH,
- c->param) < 0)
- goto fail;
-
- if (initScaleFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
- c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
- (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
- cpu_flags, srcFilter->lumV, dstFilter->lumV,
- c->param) < 0)
- goto fail;
-
- if (initScaleFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
- c->chrYInc, c->chrSrcH, c->chrDstH,
- filterAlign, (1 << 12),
- (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
- cpu_flags, srcFilter->chrV, dstFilter->chrV,
- c->param) < 0)
- goto fail;
- return 0;
-fail:
- return -1;
-}
-
-ScaleContext *scale_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
- int dstW, int dstH, enum PixelFormat dstFormat,
- int flags, ScaleFilter *srcFilter,
- ScaleFilter *dstFilter, const double *param)
-{
- ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext));
- sc->flags = flags;
- sc->srcW = srcW;
- sc->srcH = srcH;
- sc->dstW = dstW;
- sc->dstH = dstH;
- sc->srcRange = handle_scale_jpeg(&srcFormat);
- sc->dstRange = handle_scale_jpeg(&dstFormat);
- sc->srcFormat = srcFormat;
- sc->dstFormat = dstFormat;
- sc->hyscale_fast = 0;
- sc->hcscale_fast = 0;
-
- if (param)
- {
- sc->param[0] = param[0];
- sc->param[1] = param[1];
- }
-
- if (scale_init_context(sc, srcFilter, dstFilter) < 0)
- {
- sws_freeContext(sc);
- return NULL;
- }
- return sc;
-}
-
-int scale_opencl(ScaleContext *c, void *cl_inbuf, void *cl_outbuf, int *srcStride, int *dstStride)
-{
- int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
- av_scale_frame(c,cl_outbuf,cl_inbuf,srcStride,dstStride,&should_dither);
- return 1;
-}
-
-void scale_init( int width, int height, int dstwidth, int dstheight )
-{
- int srcW = width;
- int srcH = height;
- int dstW = dstwidth;
- int dstH = dstheight;
- enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P;
- enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P;
- int flags = SWS_BILINEAR;
- g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL);
-}
-
-void scale_release()
-{
- sws_freeContext( g_scale );
-}
-#ifdef USE_OPENCL
-int scale_run( cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height )
-{
- g_scale->cl_src = inbuf;
- g_scale->cl_dst = outbuf;
-
- int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 };
- int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 };
- int ret = -1;
-
- ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride );
-
- return ret;
-}
-#endif
-#endif
diff --git a/libhb/scale.h b/libhb/scale.h
deleted file mode 100644
index 46c177efc..000000000
--- a/libhb/scale.h
+++ /dev/null
@@ -1,324 +0,0 @@
-/* scale.h
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
-
- */
-
-#ifndef SCALE_H
-#define SCALE_H
-#ifdef USE_OPENCL
-#include <stdint.h>
-#include "vadxva2.h"
-#include "libavutil/pixfmt.h"
-#include "hbffmpeg.h"
-
-#define YUVRGB_TABLE_HEADROOM 128
-#define MAX_FILTER_SIZE 256
-#define is16BPS(x) \
- (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15)
-
-#define is9_OR_10BPS(x) \
- (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \
- av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9)
-
-#if ARCH_X86_64
-# define APCK_PTR2 8
-# define APCK_COEF 16
-# define APCK_SIZE 24
-#else
-# define APCK_PTR2 4
-# define APCK_COEF 8
-# define APCK_SIZE 16
-#endif
-
-typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
- const uint8_t *dither, int offset);
-
-typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
- const int16_t **src, uint8_t *dest, int dstW,
- const uint8_t *dither, int offset);
-
-typedef void (*yuv2interleavedX_fn)(struct ScaleContext *c,
- const int16_t *chrFilter,
- int chrFilterSize,
- const int16_t **chrUSrc,
- const int16_t **chrVSrc,
- uint8_t *dest, int dstW);
-
-typedef void (*yuv2packed1_fn)(struct ScaleContext *c, const int16_t *lumSrc,
- const int16_t *chrUSrc[2],
- const int16_t *chrVSrc[2],
- const int16_t *alpSrc, uint8_t *dest,
- int dstW, int uvalpha, int y);
-
-typedef void (*yuv2packed2_fn)(struct SCaleContext *c, const int16_t *lumSrc[2],
- const int16_t *chrUSrc[2],
- const int16_t *chrVSrc[2],
- const int16_t *alpSrc[2],
- uint8_t *dest,
- int dstW, int yalpha, int uvalpha, int y);
-
-typedef void (*yuv2packedX_fn)(struct SCaleContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter,
- const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc, uint8_t *dest,
- int dstW, int y);
-
-typedef int (*SwsFunc)(struct ScaleContext *context, const uint8_t *src[],
- int srcStride[], int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[]);
-
-typedef struct {
- double *coeff; ///< pointer to the list of coefficients
- int length; ///< number of coefficients in the vector
-} ScaleVector;
-
-typedef struct {
- ScaleVector *lumH;
- ScaleVector *lumV;
- ScaleVector *chrH;
- ScaleVector *chrV;
-} ScaleFilter;
-
-typedef struct ScaleContext {
- SwsFunc swScale;
- int srcW; ///< Width of source luma/alpha planes.
- int srcH; ///< Height of source luma/alpha planes.
- int dstH; ///< Height of destination luma/alpha planes.
- int chrSrcW; ///< Width of source chroma planes.
- int chrSrcH; ///< Height of source chroma planes.
- int chrDstW; ///< Width of destination chroma planes.
- int chrDstH; ///< Height of destination chroma planes.
- int lumXInc, chrXInc;
- int lumYInc, chrYInc;
- enum PixelFormat dstFormat; ///< Destination pixel format.
- enum PixelFormat srcFormat; ///< Source pixel format.
- int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
- int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
- int dstBpc, srcBpc;
- int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
- int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
- int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
- int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
- int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
- int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
- double param[2]; ///< Input parameters for scaling algorithms that need them.
-
- uint32_t pal_yuv[256];
- uint32_t pal_rgb[256];
-
- int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
- int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
- int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
- int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
- int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
- int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
- int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
- int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
- int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
- int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
-
- uint8_t *formatConvBuffer;
- int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
- int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
- int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
- int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
- int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
- int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
- int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
- int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
- int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
- int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
- int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
- int vChrFilterSize; ///< Vertical filter size for chroma pixels.
-
- int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
- int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes.
- uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
- uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes.
-
- int canMMX2BeUsed;
-
- unsigned char *dest;
- unsigned char *source;
-
- int dstY; ///< Last destination vertical line output from last slice.
- int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
- void *yuvTable; ///<s pointer to the yuv->rgb table start so it can be freed()
- uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
- int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
-
- //Colorspace stuff
- int contrast, brightness, saturation; // for sws_getColorspaceDetails
- int srcColorspaceTable[4];
- int dstColorspaceTable[4];
- int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
- int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
- int src0Alpha;
- int dst0Alpha;
- int yuv2rgb_y_offset;
- int yuv2rgb_y_coeff;
- int yuv2rgb_v2r_coeff;
- int yuv2rgb_v2g_coeff;
- int yuv2rgb_u2g_coeff;
- int yuv2rgb_u2b_coeff;
-
-#define RED_DITHER "0*8"
-#define GREEN_DITHER "1*8"
-#define BLUE_DITHER "2*8"
-#define Y_COEFF "3*8"
-#define VR_COEFF "4*8"
-#define UB_COEFF "5*8"
-#define VG_COEFF "6*8"
-#define UG_COEFF "7*8"
-#define Y_OFFSET "8*8"
-#define U_OFFSET "9*8"
-#define V_OFFSET "10*8"
-#define LUM_MMX_FILTER_OFFSET "11*8"
-#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
-#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
-#define ESP_OFFSET "11*8+4*4*256*2+8"
-#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
-#define U_TEMP "11*8+4*4*256*2+24"
-#define V_TEMP "11*8+4*4*256*2+32"
-#define Y_TEMP "11*8+4*4*256*2+40"
-#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
-#define UV_OFF_PX "11*8+4*4*256*3+48"
-#define UV_OFF_BYTE "11*8+4*4*256*3+56"
-#define DITHER16 "11*8+4*4*256*3+64"
-#define DITHER32 "11*8+4*4*256*3+80"
-
- DECLARE_ALIGNED(8, uint64_t, redDither);
- DECLARE_ALIGNED(8, uint64_t, greenDither);
- DECLARE_ALIGNED(8, uint64_t, blueDither);
-
- DECLARE_ALIGNED(8, uint64_t, yCoeff);
- DECLARE_ALIGNED(8, uint64_t, vrCoeff);
- DECLARE_ALIGNED(8, uint64_t, ubCoeff);
- DECLARE_ALIGNED(8, uint64_t, vgCoeff);
- DECLARE_ALIGNED(8, uint64_t, ugCoeff);
- DECLARE_ALIGNED(8, uint64_t, yOffset);
- DECLARE_ALIGNED(8, uint64_t, uOffset);
- DECLARE_ALIGNED(8, uint64_t, vOffset);
- int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
- int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
- int dstW; ///< Width of destination luma/alpha planes.
- DECLARE_ALIGNED(8, uint64_t, esp);
- DECLARE_ALIGNED(8, uint64_t, vRounder);
- DECLARE_ALIGNED(8, uint64_t, u_temp);
- DECLARE_ALIGNED(8, uint64_t, v_temp);
- DECLARE_ALIGNED(8, uint64_t, y_temp);
- int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
-
- DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
- DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
- DECLARE_ALIGNED(8, uint16_t, dither16)[8];
- DECLARE_ALIGNED(8, uint32_t, dither32)[8];
-
- const uint8_t *chrDither8, *lumDither8;
-
-#if HAVE_ALTIVEC
- vector signed short CY;
- vector signed short CRV;
- vector signed short CBU;
- vector signed short CGU;
- vector signed short CGV;
- vector signed short OY;
- vector unsigned short CSHIFT;
- vector signed short *vYCoeffsBank, *vCCoeffsBank;
-#endif
-
-#if ARCH_BFIN
- DECLARE_ALIGNED(4, uint32_t, oy);
- DECLARE_ALIGNED(4, uint32_t, oc);
- DECLARE_ALIGNED(4, uint32_t, zero);
- DECLARE_ALIGNED(4, uint32_t, cy);
- DECLARE_ALIGNED(4, uint32_t, crv);
- DECLARE_ALIGNED(4, uint32_t, rmask);
- DECLARE_ALIGNED(4, uint32_t, cbu);
- DECLARE_ALIGNED(4, uint32_t, bmask);
- DECLARE_ALIGNED(4, uint32_t, cgu);
- DECLARE_ALIGNED(4, uint32_t, cgv);
- DECLARE_ALIGNED(4, uint32_t, gmask);
-#endif
-
-#if HAVE_VIS
- DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
-#endif
- int use_mmx_vfilter;
-
- /* function pointers for swScale() */
- yuv2planar1_fn yuv2plane1;
- yuv2planarX_fn yuv2planeX;
- yuv2interleavedX_fn yuv2nv12cX;
- yuv2packed1_fn yuv2packed1;
- yuv2packed2_fn yuv2packed2;
- yuv2packedX_fn yuv2packedX;
-
- /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
- void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
- int width, uint32_t *pal);
- /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
- void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
- int width, uint32_t *pal);
- /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
- void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
- int width, uint32_t *pal);
-
- void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
- void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
- int width);
-
- void (*hyscale_fast)(struct SwsContext *c,
- int16_t *dst, int dstWidth,
- const uint8_t *src, int srcW, int xInc);
- void (*hcscale_fast)(struct SwsContext *c,
- int16_t *dst1, int16_t *dst2, int dstWidth,
- const uint8_t *src1, const uint8_t *src2,
- int srcW, int xInc);
-
- void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
- const uint8_t *src, const int16_t *filter,
- const int32_t *filterPos, int filterSize);
- void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
- const uint8_t *src, const int16_t *filter,
- const int32_t *filterPos, int filterSize);
-
- void (*lumConvertRange)(int16_t *dst, int width);
- void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
-
- int needs_hcscale; ///< Set if there are chroma planes to be converted.
-
- cl_mem cl_hLumFilter;
- cl_mem cl_hLumFilterPos;
- cl_mem cl_hChrFilter;
- cl_mem cl_hChrFilterPos;
- cl_mem cl_vLumFilter;
- cl_mem cl_vLumFilterPos;
- cl_mem cl_vChrFilter;
- cl_mem cl_vChrFilterPos;
-
- cl_mem cl_intermediaBuf;
-
- cl_mem cl_src;
- cl_mem cl_dst;
-} ScaleContext;
-
-void scale_init(int, int, int, int);
-void scale_release();
-int scale_run(cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height);
-#endif
-#endif
diff --git a/libhb/scale_kernel.c b/libhb/scale_kernel.c
deleted file mode 100644
index ddcfed914..000000000
--- a/libhb/scale_kernel.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/* scale_kernel.h
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
-
- */
-
-#ifdef USE_OPENCL
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include "scale.h"
-#include "openclwrapper.h"
-
-#define OCLCHECK( method, ...) \
- status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \
- hb_log(" error %s %d",# method, status); assert(0); return status; }
-
-#define CREATEBUF( out, flags, size, ptr)\
- out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\
- if( status != CL_SUCCESS ) { hb_log( "clCreateBuffer faild %d", status ); return -1; }
-
- #define CL_PARAM_NUM 20
-
-/****************************************************************************************************************************/
-/*************************Combine the hscale and yuv2plane into scaling******************************************************/
-/****************************************************************************************************************************/
-static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv )
-{
- cl_int status;
-
- if(!c->hyscale_fast || !c->hcscale_fast)
- {
- CREATEBUF(c->cl_hLumFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short), c->hLumFilter);
- CREATEBUF(c->cl_hLumFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int), c->hLumFilterPos);
- CREATEBUF(c->cl_hChrFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short), c->hChrFilter);
- CREATEBUF(c->cl_hChrFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int), c->hChrFilterPos);
- }
- if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
- {
- CREATEBUF(c->cl_vLumFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstH*c->vLumFilterSize*sizeof(cl_short), c->vLumFilter);
- CREATEBUF(c->cl_vChrFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstH*c->vChrFilterSize*sizeof(cl_short), c->vChrFilter);
- }
- CREATEBUF(c->cl_vLumFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstH*sizeof(cl_int), c->vLumFilterPos);
- CREATEBUF(c->cl_vChrFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstH*sizeof(cl_int), c->vChrFilterPos);
-
- return 1;
-}
-
-int av_scale_frame_func( void **userdata, KernelEnv *kenv )
-{
- ScaleContext *c = (ScaleContext *)userdata[0];
-
- c->cl_src = (cl_mem)userdata[2];
- c->cl_dst = (cl_mem)userdata[1];
-
- /*frame size*/
- int *tmp = (int *)userdata[3];
- int srcStride = tmp[0];
- int srcChrStride = tmp[1];
- int srcW = c->srcW;
- int srcH = c->srcH;
-
- tmp = (int *)userdata[4];
- int dstStride = tmp[0];
- int dstChrStride = tmp[1];
- int dstW = c->dstW;
- int dstH = c->dstH;
-
- /* local variable */
- cl_int status;
- size_t global_work_size[2];
-
- int intermediaSize;
-
- int st = CreateCLBuffer(c,kenv);
- if( !st )
- {
- hb_log( "CreateBuffer[%s] faild %d", "scale_opencl",st );
- return -1;
- }
-
- intermediaSize = dstStride * srcH + dstChrStride * srcH;
-
- CREATEBUF(c->cl_intermediaBuf, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, intermediaSize*sizeof(cl_short), NULL);
-
- static int init_chr_status = 0;
- static cl_kernel chr_kernel;
-
- if(init_chr_status == 0){
-
- if(!(c->flags & 1))
- {
- chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );
- //Set the Kernel Argument;
- OCLCHECK(clSetKernelArg,chr_kernel, 2, sizeof(cl_mem), (void*)&c->cl_hLumFilter);
- OCLCHECK(clSetKernelArg,chr_kernel, 3, sizeof(cl_mem), (void*)&c->cl_hLumFilterPos);
- OCLCHECK(clSetKernelArg,chr_kernel, 4, sizeof(int), (void*)&c->hLumFilterSize);
- OCLCHECK(clSetKernelArg,chr_kernel, 5, sizeof(cl_mem), (void*)&c->cl_hChrFilter);
- OCLCHECK(clSetKernelArg,chr_kernel, 6, sizeof(cl_mem), (void*)&c->cl_hChrFilterPos);
- OCLCHECK(clSetKernelArg,chr_kernel, 7, sizeof(int), (void*)&c->hChrFilterSize);
- }
-
- /*Set the arguments*/
- OCLCHECK(clSetKernelArg, chr_kernel, 8, sizeof(dstW), (void*)&dstW);
- OCLCHECK(clSetKernelArg, chr_kernel, 9, sizeof(srcH), (void*)&srcH);
- OCLCHECK(clSetKernelArg, chr_kernel, 10, sizeof(srcW), (void*)&srcW);
- OCLCHECK(clSetKernelArg, chr_kernel, 11, sizeof(srcH), (void*)&srcH);
- OCLCHECK(clSetKernelArg, chr_kernel, 12, sizeof(dstStride), (void*)&dstStride);
- OCLCHECK(clSetKernelArg, chr_kernel, 13, sizeof(dstChrStride), (void*)&dstChrStride);
- OCLCHECK(clSetKernelArg, chr_kernel, 14, sizeof(srcStride), (void*)&srcStride);
- OCLCHECK(clSetKernelArg, chr_kernel, 15, sizeof(srcChrStride), (void*)&srcChrStride);
- init_chr_status = 1;
- }
-
- kenv->kernel = chr_kernel;
- OCLCHECK(clSetKernelArg, chr_kernel, 0, sizeof(cl_mem), (void*)&c->cl_intermediaBuf);
- OCLCHECK(clSetKernelArg, chr_kernel, 1, sizeof(cl_mem), (void*)&c->cl_src);
- /*Run the Kernel*/
- global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;
- global_work_size[1] = c->chrSrcH;
-
- OCLCHECK(clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
-
- static int init_lum_status = 0;
- static cl_kernel lum_kernel;
-
- if( init_lum_status == 0 ){
- //Vertical:
- /*Create Kernel*/
- if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
- lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );
- else
- lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );
-
- if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
- {
- OCLCHECK(clSetKernelArg, lum_kernel, 2, sizeof(cl_mem), (void*)&c->cl_vLumFilter);
- OCLCHECK(clSetKernelArg, lum_kernel, 3, sizeof(int), (void*)&c->vLumFilterSize);
- OCLCHECK(clSetKernelArg, lum_kernel, 4, sizeof(cl_mem), (void*)&c->cl_vChrFilter);
- OCLCHECK(clSetKernelArg, lum_kernel, 5, sizeof(int), (void*)&c->vChrFilterSize);
- OCLCHECK(clSetKernelArg, lum_kernel, 6, sizeof(cl_mem), (void*)&c->cl_vLumFilterPos);
- OCLCHECK(clSetKernelArg, lum_kernel, 7, sizeof(cl_mem), (void*)&c->cl_vChrFilterPos);
- OCLCHECK(clSetKernelArg, lum_kernel, 8, sizeof(dstW), (void*)&dstW);
- OCLCHECK(clSetKernelArg, lum_kernel, 9, sizeof(dstH), (void*)&dstH);
- OCLCHECK(clSetKernelArg, lum_kernel, 10, sizeof(srcW), (void*)&srcW);
- OCLCHECK(clSetKernelArg, lum_kernel, 11, sizeof(srcH), (void*)&srcH);
- OCLCHECK(clSetKernelArg, lum_kernel, 12, sizeof(dstStride), (void*)&dstStride);
- OCLCHECK(clSetKernelArg, lum_kernel, 13, sizeof(dstChrStride), (void*)&dstChrStride);
- OCLCHECK(clSetKernelArg, lum_kernel, 14, sizeof(dstStride), (void*)&dstStride);
- OCLCHECK(clSetKernelArg, lum_kernel, 15, sizeof(dstChrStride), (void*)&dstChrStride);
- }
- else
- {
- OCLCHECK(clSetKernelArg, lum_kernel, 2, sizeof(cl_mem), (void*)&c->cl_vLumFilterPos);
- OCLCHECK(clSetKernelArg, lum_kernel, 3, sizeof(cl_mem), (void*)&c->cl_vChrFilterPos);
- OCLCHECK(clSetKernelArg, lum_kernel, 4, sizeof(dstW), (void*)&dstW);
- OCLCHECK(clSetKernelArg, lum_kernel, 5, sizeof(dstH), (void*)&dstH);
- OCLCHECK(clSetKernelArg, lum_kernel, 6, sizeof(srcW), (void*)&srcW);
- OCLCHECK(clSetKernelArg, lum_kernel, 7, sizeof(srcH), (void*)&srcH);
- OCLCHECK(clSetKernelArg, lum_kernel, 8, sizeof(dstStride), (void*)&dstStride);
- OCLCHECK(clSetKernelArg, lum_kernel, 9, sizeof(dstChrStride), (void*)&dstChrStride);
- OCLCHECK(clSetKernelArg, lum_kernel, 10, sizeof(dstStride), (void*)&dstStride);
- OCLCHECK(clSetKernelArg, lum_kernel, 11, sizeof(dstChrStride), (void*)&dstChrStride);
- }
- init_lum_status = 1;
- }
-
- kenv->kernel = lum_kernel;
- OCLCHECK(clSetKernelArg, kenv->kernel, 0, sizeof(cl_mem), (void*)&c->cl_dst);
- OCLCHECK(clSetKernelArg, kenv->kernel, 1, sizeof(cl_mem), (void*)&c->cl_intermediaBuf);
-
- /*Run the Kernel*/
- global_work_size[0] = c->chrDstW;
- global_work_size[1] = c->chrDstH;
-
- OCLCHECK(clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL,global_work_size, NULL, 0, NULL, NULL);
-
- clReleaseMemObject( c->cl_intermediaBuf );
-
- return 1;
-}
-
-void av_scale_frame(ScaleContext *c, void *dst, void *src, int *srcStride, int *dstStride, int *should_dither)
-{
-
- static int regflg = 0;
- void *userdata[CL_PARAM_NUM];
- userdata[0] = (void *)c;
- userdata[1] = (void *)dst;
- userdata[2] = (void *)src;
- userdata[3] = (void *)srcStride;
- userdata[4] = (void *)dstStride;
- userdata[5] = (void *)should_dither;
-
- if( regflg==0 )
- {
- int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func);
- if( !st )
- {
- hb_log( "register kernel[%s] faild %d", "scale_opencl",st );
- return;
- }
- regflg++;
- }
-
- if( !hb_run_kernel( "scale_opencl", userdata ))
- {
- hb_log("run kernel function[%s] faild", "scale_opencl_func" );
- return;
- }
-}
-
-#endif
diff --git a/libhb/scale_kernel.h b/libhb/scale_kernel.h
deleted file mode 100644
index ce413382f..000000000
--- a/libhb/scale_kernel.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* scale_kernel.h
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
-
- */
-
-#ifndef _H_SCALE_KERNEL_H
-#define _H_SCALE_KERNEL_H
-#ifdef USE_OPENCL
-void av_scale_frame(ScaleContext *c, void *dst, void *src, int *srcStride, int *dstStride, int *should_dither);
-#endif
-#endif
diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c
index 4137b63c6..be82f8372 100644
--- a/libhb/vadxva2.c
+++ b/libhb/vadxva2.c
@@ -20,7 +20,6 @@
#endif
#include "oclnv12toyuv.h"
-#include "scale.h"
#endif
#ifdef USE_HWD
@@ -577,34 +576,6 @@ static void hb_copy_from_nv12( uint8_t *dst, uint8_t *src[2], size_t src_pitch[2
}
}
-#ifdef USE_OPENCL
-void hb_init_filter( cl_mem src, int srcwidth, int srcheight, uint8_t* dst, int dstwidth, int dstheight, int *crop )
-{
- T_FilterLink fl = {0};
- int STEP = srcwidth * srcheight * 3 / 2;
- int OUTSTEP = dstwidth * dstheight * 3 / 2;
- int HEIGHT = srcheight;
- int LINESIZEY = srcwidth;
- int LINESIZEUV = srcwidth / 2;
-
- cl_mem cl_outbuf;
-
- if( !hb_create_buffer( &(cl_outbuf), CL_MEM_WRITE_ONLY, OUTSTEP ) )
- {
- hb_log("av_create_buffer cl_outbuf Error");
- return;
- }
-
- fl.cl_outbuf = cl_outbuf;
-
- scale_run( src, fl.cl_outbuf, LINESIZEY, LINESIZEUV, HEIGHT );
-
- hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP );
- CL_FREE( cl_outbuf );
-
- return;
-}
-#endif
/**
* lock frame data form surface.
* nv12 to yuv with opencl and with C reference