summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormaxd <[email protected]>2017-02-08 13:17:00 +0100
committerBradley Sepos <[email protected]>2017-03-05 12:06:11 -0500
commit5daaebf8a105bbc62d6431730478b419cb3f14e0 (patch)
tree99e6e24ec65ab99e495cbbff8690b14350e02bfa
parent45eb6a58ae7fd4e0f84d9e64eb78def0eebfe274 (diff)
qsv: adding hevc10 support starting from KBL platform
-rw-r--r--contrib/ffmpeg/A05-p10-output-support.patch149
-rw-r--r--libhb/enc_qsv.c53
-rw-r--r--libhb/qsv_common.c15
3 files changed, 203 insertions, 14 deletions
diff --git a/contrib/ffmpeg/A05-p10-output-support.patch b/contrib/ffmpeg/A05-p10-output-support.patch
new file mode 100644
index 000000000..5656cfc46
--- /dev/null
+++ b/contrib/ffmpeg/A05-p10-output-support.patch
@@ -0,0 +1,149 @@
+diff -Naur ./libav-12.org/libswscale/output.c ./libav-12/libswscale/output.c
+--- ./libav-12.org/libswscale/output.c 2016-10-16 23:10:02.000000000 +0200
++++ ./libav-12/libswscale/output.c 2017-02-07 23:37:28.150180400 +0100
+@@ -295,6 +295,98 @@
+ }
+ }
+
++
++#define output_pixel(pos, val) \
++ if (big_endian) { \
++ AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
++ } else { \
++ AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
++ }
++
++static void yuv2p010l1_c(const int16_t *src,
++ uint16_t *dest, int dstW,
++ int big_endian)
++{
++ int i;
++ int shift = 5;
++
++ for (i = 0; i < dstW; i++) {
++ int val = src[i] + (1 << (shift - 1));
++ output_pixel(&dest[i], val);
++ }
++}
++
++static void yuv2p010lX_c(const int16_t *filter, int filterSize,
++ const int16_t **src, uint16_t *dest, int dstW,
++ int big_endian)
++{
++ int i, j;
++ int shift = 17;
++
++ for (i = 0; i < dstW; i++) {
++ int val = 1 << (shift - 1);
++
++ for (j = 0; j < filterSize; j++)
++ val += src[j][i] * filter[j];
++
++ output_pixel(&dest[i], val);
++ }
++}
++
++static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
++ const int16_t **chrUSrc, const int16_t **chrVSrc,
++ uint8_t *dest8, int chrDstW)
++{
++ uint16_t *dest = (uint16_t*)dest8;
++ int shift = 17;
++ int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
++ int i, j;
++
++ for (i = 0; i < chrDstW; i++) {
++ int u = 1 << (shift - 1);
++ int v = 1 << (shift - 1);
++
++ for (j = 0; j < chrFilterSize; j++) {
++ u += chrUSrc[j][i] * chrFilter[j];
++ v += chrVSrc[j][i] * chrFilter[j];
++ }
++
++ output_pixel(&dest[2*i] , u);
++ output_pixel(&dest[2*i+1], v);
++ }
++}
++
++static void yuv2p010l1_LE_c(const int16_t *src,
++ uint8_t *dest, int dstW,
++ const uint8_t *dither, int offset)
++{
++ yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
++}
++
++static void yuv2p010l1_BE_c(const int16_t *src,
++ uint8_t *dest, int dstW,
++ const uint8_t *dither, int offset)
++{
++ yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
++}
++
++static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
++ const int16_t **src, uint8_t *dest, int dstW,
++ const uint8_t *dither, int offset)
++{
++ yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
++}
++
++static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
++ const int16_t **src, uint8_t *dest, int dstW,
++ const uint8_t *dither, int offset)
++{
++ yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
++}
++
++#undef output_pixel
++
++
+ #define accumulate_bit(acc, val) \
+ acc <<= 1; \
+ acc |= (val) >= (128 + 110)
+@@ -1361,7 +1453,11 @@
+ enum AVPixelFormat dstFormat = c->dstFormat;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
+
+- if (is16BPS(dstFormat)) {
++ if (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P010BE) {
++ *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
++ *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
++ *yuv2nv12cX = yuv2p010cX_c;
++ } else if (is16BPS(dstFormat)) {
+ *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
+ *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
+ } else if (is9_OR_10BPS(dstFormat)) {
+diff -Naur ./libav-12.org/libswscale/utils.c ./libav-12/libswscale/utils.c
+--- ./libav-12.org/libswscale/utils.c 2016-10-16 23:10:02.000000000 +0200
++++ ./libav-12/libswscale/utils.c 2017-02-07 23:20:09.617945500 +0100
+@@ -185,8 +185,8 @@
+ [AV_PIX_FMT_GBRAP16BE] = { 1, 0 },
+ [AV_PIX_FMT_XYZ12BE] = { 0, 0, 1 },
+ [AV_PIX_FMT_XYZ12LE] = { 0, 0, 1 },
+- [AV_PIX_FMT_P010LE] = { 1, 0 },
+- [AV_PIX_FMT_P010BE] = { 1, 0 },
++ [AV_PIX_FMT_P010LE] = { 1, 1 },
++ [AV_PIX_FMT_P010BE] = { 1, 1 },
+ };
+
+ int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
+diff -Naur ./libav-12.org/libswscale/x86/swscale.c ./libav-12/libswscale/x86/swscale.c
+--- ./libav-12.org/libswscale/x86/swscale.c 2016-10-16 23:10:02.000000000 +0200
++++ ./libav-12/libswscale/x86/swscale.c 2017-02-07 23:15:14.000000000 +0100
+@@ -338,14 +338,14 @@
+ #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
+ switch(c->dstBpc){ \
+ case 16: do_16_case; break; \
+- case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
++ case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
+ case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
+ default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
+ }
+ #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
+ switch(c->dstBpc){ \
+ case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
+- case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
++ case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
+ case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
+ default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
+ }
diff --git a/libhb/enc_qsv.c b/libhb/enc_qsv.c
index b6e80687e..a419767ee 100644
--- a/libhb/enc_qsv.c
+++ b/libhb/enc_qsv.c
@@ -302,8 +302,14 @@ static int qsv_hevc_make_header(hb_work_object_t *w, mfxSession session)
ret = -1;
goto end;
}
+
+ /* need more space for 10bits */
+ if (pv->param.videoParam->mfx.FrameInfo.FourCC == MFX_FOURCC_P010)
+ {
+ hb_buffer_realloc(bitstream_buf,bitstream_buf->size*2);
+ }
bitstream.Data = bitstream_buf->data;
- bitstream.MaxLength = bitstream_buf->size;
+ bitstream.MaxLength = bitstream_buf->alloc;
/* We only need to encode one frame, so we only need one surface */
mfxU16 Height = pv->param.videoParam->mfx.FrameInfo.Height;
@@ -521,13 +527,26 @@ int qsv_enc_init(hb_work_private_t *pv)
}
else
{
- pv->sws_context_to_nv12 = hb_sws_get_context(
- job->width, job->height,
- AV_PIX_FMT_YUV420P,
- job->width, job->height,
- AV_PIX_FMT_NV12,
- SWS_LANCZOS|SWS_ACCURATE_RND,
- SWS_CS_DEFAULT);
+ if (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_HEVC_MAIN10)
+ {
+ pv->sws_context_to_nv12 = hb_sws_get_context(
+ job->width, job->height,
+ AV_PIX_FMT_YUV420P,
+ job->width, job->height,
+ AV_PIX_FMT_P010LE,
+ SWS_LANCZOS|SWS_ACCURATE_RND,
+ SWS_CS_DEFAULT);
+ }
+ else
+ {
+ pv->sws_context_to_nv12 = hb_sws_get_context(
+ job->width, job->height,
+ AV_PIX_FMT_YUV420P,
+ job->width, job->height,
+ AV_PIX_FMT_NV12,
+ SWS_LANCZOS|SWS_ACCURATE_RND,
+ SWS_CS_DEFAULT);
+ }
}
// allocate tasks
@@ -589,14 +608,17 @@ int qsv_enc_init(hb_work_private_t *pv)
{
qsv_encode->surface_num = HB_QSV_SURFACE_NUM;
}
+
+ /* should have 15bpp/AV_PIX_FMT_YUV420P10LE (almost x2) instead of 12bpp/AV_PIX_FMT_NV12 */
+ int bpp12 = (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_HEVC_MAIN10) ? 6 : 3;
for (i = 0; i < qsv_encode->surface_num; i++)
{
mfxFrameSurface1 *surface = av_mallocz(sizeof(mfxFrameSurface1));
mfxFrameInfo info = pv->param.videoParam->mfx.FrameInfo;
surface->Info = info;
- surface->Data.Pitch = info.Width;
- surface->Data.Y = av_mallocz(info.Width * info.Height * 3 / 2);
- surface->Data.VU = surface->Data.Y + info.Width * info.Height;
+ surface->Data.Pitch = info.Width * (bpp12 == 6 ? 2 : 1);
+ surface->Data.Y = av_mallocz(info.Width * info.Height * (bpp12 / 2.0));
+ surface->Data.VU = surface->Data.Y + info.Width * info.Height * (bpp12 == 6 ? 2 : 1);
qsv_encode->p_surfaces[i] = surface;
}
}
@@ -847,12 +869,21 @@ int encqsvInit(hb_work_object_t *w, hb_job_t *job)
hb_error("encqsvInit: bad profile %s", job->encoder_profile);
return -1;
}
+
if (hb_qsv_level_parse(&pv->param, pv->qsv_info, job->encoder_level))
{
hb_error("encqsvInit: bad level %s", job->encoder_level);
return -1;
}
+ if (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_HEVC_MAIN10)
+ {
+ pv->param.videoParam->mfx.FrameInfo.FourCC = MFX_FOURCC_P010;
+ pv->param.videoParam->mfx.FrameInfo.BitDepthLuma = 10;
+ pv->param.videoParam->mfx.FrameInfo.BitDepthChroma = 10;
+ pv->param.videoParam->mfx.FrameInfo.Shift = 0;
+ }
+
// interlaced encoding is not always possible
if (pv->param.videoParam->mfx.CodecId == MFX_CODEC_AVC &&
pv->param.videoParam->mfx.FrameInfo.PicStruct != MFX_PICSTRUCT_PROGRESSIVE)
diff --git a/libhb/qsv_common.c b/libhb/qsv_common.c
index 011c94e10..cef199092 100644
--- a/libhb/qsv_common.c
+++ b/libhb/qsv_common.c
@@ -148,7 +148,7 @@ static int qsv_implementation_is_hardware(mfxIMPL implementation)
int hb_qsv_available()
{
return ((hb_qsv_video_encoder_is_enabled(HB_VCODEC_QSV_H264) ? HB_VCODEC_QSV_H264 : 0) |
- (hb_qsv_video_encoder_is_enabled(HB_VCODEC_QSV_H265) ? HB_VCODEC_QSV_H265 : 0));
+ (hb_qsv_video_encoder_is_enabled(HB_VCODEC_QSV_H265) ? HB_VCODEC_QSV_H265 : 0));
}
int hb_qsv_video_encoder_is_enabled(int encoder)
@@ -781,14 +781,14 @@ void hb_qsv_info_print()
qsv_hardware_version.Major, qsv_hardware_version.Minor,
HB_QSV_MINVERSION_MAJOR, HB_QSV_MINVERSION_MINOR);
}
-
+
if (qsv_software_version.Version)
{
hb_log(" - Intel Media SDK software: API %"PRIu16".%"PRIu16" (minimum: %"PRIu16".%"PRIu16")",
qsv_software_version.Major, qsv_software_version.Minor,
HB_QSV_MINVERSION_MAJOR, HB_QSV_MINVERSION_MINOR);
}
-
+
if (hb_qsv_info_avc != NULL && hb_qsv_info_avc->available)
{
hb_log(" - H.264 encoder: yes");
@@ -1542,6 +1542,15 @@ int hb_qsv_profile_parse(hb_qsv_param_t *param, hb_qsv_info_t *info, const char
case MFX_CODEC_HEVC:
profile = hb_triplet4key(hb_qsv_h265_profiles, profile_key);
+
+ /* HEVC10 supported starting from KBL/G6 */
+ if (profile->value == MFX_PROFILE_HEVC_MAIN10 &&
+ qsv_hardware_generation(hb_get_cpu_platform()) < QSV_G6)
+ {
+ hb_log("HEVC Main10 is not supported on this platform");
+ profile = NULL;
+ }
+
break;
default: