Big merge, QSV to trunk: part 2 (new files).

git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5738 b64f7644-9d1e-0410-96f1-a4d463321fa5
author: Rodeo <[email protected]> 2013-08-22 20:34:44 +0000
committer: Rodeo <[email protected]> 2013-08-22 20:34:44 +0000
commit: 3326f988806a5decae025727784a19c8cc223833 (patch)
tree: cd75adb1975d223d7a0fd43a31030a78939e73d3 /libhb/enc_qsv.c
parent: d41905d539046445e1b81499ff7bd04d170c91d4 (diff)
1 files changed, 1543 insertions, 0 deletions
diff --git a/libhb/enc_qsv.c b/libhb/enc_qsv.c
new file mode 100644
index 000000000..c1c832a81
--- /dev/null
+++ b/libhb/enc_qsv.c
@@ -0,0 +1,1543 @@
+/* ********************************************************************* *\
+
+Copyright (C) 2013 Intel Corporation.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+- Neither the name of Intel Corporation nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+\* ********************************************************************* */
+
+#include "hb.h"
+#include "enc_qsv.h"
+#include "qsv_common.h"
+#include "qsv_memory.h"
+#include "h264_common.h"
+
+int  encqsvInit( hb_work_object_t *, hb_job_t * );
+int  encqsvWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
+void encqsvClose( hb_work_object_t * );
+
+hb_work_object_t hb_encqsv =
+{
+    WORK_ENCQSV,
+    "H.264/AVC encoder (Intel QSV)",
+    encqsvInit,
+    encqsvWork,
+    encqsvClose
+};
+
+struct hb_work_private_s
+{
+    hb_job_t *job;
+    uint32_t  frames_in;
+    uint32_t  frames_out;
+    int64_t   last_start;
+
+    hb_qsv_param_t param;
+    av_qsv_space enc_space;
+
+    mfxEncodeCtrl force_keyframe;
+    struct
+    {
+        int     index;
+        int64_t start;
+    } next_chapter;
+
+#define BFRM_DELAY_MAX 16
+    // for DTS generation (when MSDK API < 1.6 or VFR)
+    int            bfrm_delay;
+    int            bfrm_workaround;
+    int64_t        init_pts[BFRM_DELAY_MAX + 1];
+    hb_list_t     *list_dts;
+
+    int async_depth;
+    int max_async_depth;
+
+    // if encode-only, system memory used
+    int is_sys_mem;
+    struct SwsContext *sws_context_to_nv12;
+
+    // whether to expect input from VPP or from QSV decode
+    int is_vpp_present;
+
+    // whether the encoder is initialized
+    int init_done;
+
+    hb_list_t *delayed_processing;
+};
+
+// for DTS generation (when MSDK API < 1.6 or VFR)
+static void hb_qsv_add_new_dts(hb_list_t *list, int64_t new_dts)
+{
+    if (list != NULL)
+    {
+        int64_t *item = malloc(sizeof(int64_t));
+        if (item != NULL)
+        {
+            *item = new_dts;
+            hb_list_add(list, item);
+        }
+    }
+}
+static int64_t hb_qsv_pop_next_dts(hb_list_t *list)
+{
+    int64_t next_dts = INT64_MIN;
+    if (list != NULL && hb_list_count(list) > 0)
+    {
+        int64_t *item = hb_list_item(list, 0);
+        if (item != NULL)
+        {
+            next_dts = *item;
+            hb_list_rem(list, item);
+            free(item);
+        }
+    }
+    return next_dts;
+}
+
+static const char* qsv_h264_profile_xlat(int profile)
+{
+    switch (profile)
+    {
+        case MFX_PROFILE_AVC_CONSTRAINED_BASELINE:
+            return "Constrained Baseline";
+        case MFX_PROFILE_AVC_BASELINE:
+            return "Baseline";
+        case MFX_PROFILE_AVC_EXTENDED:
+            return "Extended";
+        case MFX_PROFILE_AVC_MAIN:
+            return "Main";
+        case MFX_PROFILE_AVC_CONSTRAINED_HIGH:
+            return "Constrained High";
+        case MFX_PROFILE_AVC_PROGRESSIVE_HIGH:
+            return "Progressive High";
+        case MFX_PROFILE_AVC_HIGH:
+            return "High";
+        case MFX_PROFILE_UNKNOWN:
+        default:
+            return NULL;
+    }
+}
+
+static const char* qsv_h264_level_xlat(int level)
+{
+    int i;
+    for (i = 0; hb_h264_level_names[i] != NULL; i++)
+    {
+        if (hb_h264_level_values[i] == level)
+        {
+            return hb_h264_level_names[i];
+        }
+    }
+    return NULL;
+}
+
+int qsv_enc_init(av_qsv_context *qsv, hb_work_private_t *pv)
+{
+    int i = 0;
+    mfxStatus sts;
+    hb_job_t *job = pv->job;
+
+    if (pv->init_done)
+    {
+        return 0;
+    }
+
+    if (qsv == NULL)
+    {
+        if (!pv->is_sys_mem)
+        {
+            hb_error("qsv_enc_init: decode enabled but no context!");
+            return 3;
+        }
+        job->qsv = qsv = av_mallocz(sizeof(av_qsv_context));
+    }
+
+    av_qsv_space *qsv_encode = qsv->enc_space;
+    if (qsv_encode == NULL)
+    {
+        // if only for encode
+        if (pv->is_sys_mem)
+        {
+            // no need to use additional sync as encode only -> single thread
+            // XXX: this zeroes the session handle, so call it before MFXInit
+            av_qsv_add_context_usage(qsv, 0);
+
+            // initialize the session
+            qsv->impl      = MFX_IMPL_AUTO_ANY;
+            qsv->ver.Major = AV_QSV_MSDK_VERSION_MAJOR;
+            qsv->ver.Minor = AV_QSV_MSDK_VERSION_MINOR;
+            sts = MFXInit(qsv->impl, &qsv->ver, &qsv->mfx_session);
+            if (sts != MFX_ERR_NONE)
+            {
+                hb_error("qsv_enc_init: MFXInit failed (%d)", sts);
+                *job->die = 1;
+                return -1;
+            }
+        }
+        qsv->enc_space = qsv_encode = &pv->enc_space;
+    }
+
+    if (!pv->is_sys_mem)
+    {
+        if (!pv->is_vpp_present && job->list_filter != NULL)
+        {
+            for (i = 0; i < hb_list_count(job->list_filter); i++)
+            {
+                hb_filter_object_t *filter = hb_list_item(job->list_filter, i);
+                if (filter->id == HB_FILTER_QSV_PRE  ||
+                    filter->id == HB_FILTER_QSV_POST ||
+                    filter->id == HB_FILTER_QSV)
+                {
+                    pv->is_vpp_present = 1;
+                    break;
+                }
+            }
+        }
+
+        if (pv->is_vpp_present)
+        {
+            if (qsv->vpp_space == NULL)
+            {
+                return 2;
+            }
+            for (i = 0; i < av_qsv_list_count(qsv->vpp_space); i++)
+            {
+                av_qsv_space *vpp = av_qsv_list_item(qsv->vpp_space, i);
+                if (!vpp->is_init_done)
+                {
+                    return 2;
+                }
+            }
+        }
+
+        av_qsv_space *dec_space = qsv->dec_space;
+        if (dec_space == NULL || !dec_space->is_init_done)
+        {
+            return 2;
+        }
+    }
+    else
+    {
+        pv->sws_context_to_nv12 = hb_sws_get_context(job->width, job->height,
+                                                     AV_PIX_FMT_YUV420P,
+                                                     job->width, job->height,
+                                                     AV_PIX_FMT_NV12,
+                                                     SWS_LANCZOS|SWS_ACCURATE_RND);
+    }
+
+    // allocate tasks
+    qsv_encode->p_buf_max_size = AV_QSV_BUF_SIZE_DEFAULT;
+    qsv_encode->tasks          = av_qsv_list_init(HAVE_THREADS);
+    for (i = 0; i < pv->max_async_depth; i++)
+    {
+        av_qsv_task *task    = av_mallocz(sizeof(av_qsv_task));
+        task->bs             = av_mallocz(sizeof(mfxBitstream));
+        task->bs->Data       = av_mallocz(sizeof(uint8_t) * qsv_encode->p_buf_max_size);
+        task->bs->MaxLength  = qsv_encode->p_buf_max_size;
+        task->bs->DataLength = 0;
+        task->bs->DataOffset = 0;
+        av_qsv_list_add(qsv_encode->tasks, task);
+    }
+
+    // setup surface allocation
+    qsv_encode->m_mfxVideoParam.IOPattern = (pv->is_sys_mem                 ?
+                                             MFX_IOPATTERN_IN_SYSTEM_MEMORY :
+                                             MFX_IOPATTERN_IN_OPAQUE_MEMORY);
+    memset(&qsv_encode->request, 0, sizeof(mfxFrameAllocRequest) * 2);
+    sts = MFXVideoENCODE_QueryIOSurf(qsv->mfx_session,
+                                     &qsv_encode->m_mfxVideoParam,
+                                     &qsv_encode->request);
+    if (sts < MFX_ERR_NONE) // ignore warnings
+    {
+        hb_error("qsv_enc_init: MFXVideoENCODE_QueryIOSurf failed (%d)", sts);
+        *job->die = 1;
+        return -1;
+    }
+
+    // allocate surfaces
+    if (pv->is_sys_mem)
+    {
+        qsv_encode->surface_num = FFMIN(qsv_encode->request[0].NumFrameSuggested +
+                                        pv->max_async_depth, AV_QSV_SURFACE_NUM);
+        if (qsv_encode->surface_num <= 0)
+        {
+            qsv_encode->surface_num = AV_QSV_SURFACE_NUM;
+        }
+        for (i = 0; i < qsv_encode->surface_num; i++)
+        {
+            qsv_encode->p_surfaces[i] = av_mallocz(sizeof(mfxFrameSurface1));
+            AV_QSV_CHECK_POINTER(qsv_encode->p_surfaces[i], MFX_ERR_MEMORY_ALLOC);
+            memcpy(&(qsv_encode->p_surfaces[i]->Info),
+                   &(qsv_encode->request[0].Info), sizeof(mfxFrameInfo));
+        }
+    }
+    else
+    {
+        av_qsv_space *in_space = qsv->dec_space;
+        if (pv->is_vpp_present)
+        {
+            // we get our input from VPP instead
+            in_space = av_qsv_list_item(qsv->vpp_space,
+                                        av_qsv_list_count(qsv->vpp_space) - 1);
+        }
+        // introduced in API 1.3
+        memset(&qsv_encode->ext_opaque_alloc, 0, sizeof(mfxExtOpaqueSurfaceAlloc));
+        qsv_encode->ext_opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+        qsv_encode->ext_opaque_alloc.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc);
+        qsv_encode->ext_opaque_alloc.In.Surfaces     = in_space->p_surfaces;
+        qsv_encode->ext_opaque_alloc.In.NumSurface   = in_space->surface_num;
+        qsv_encode->ext_opaque_alloc.In.Type         = qsv_encode->request[0].Type;
+        qsv_encode->m_mfxVideoParam.ExtParam[qsv_encode->m_mfxVideoParam.NumExtParam++] = (mfxExtBuffer*)&qsv_encode->ext_opaque_alloc;
+    }
+
+    // allocate sync points
+    qsv_encode->sync_num = (qsv_encode->surface_num                         ?
+                            FFMIN(qsv_encode->surface_num, AV_QSV_SYNC_NUM) :
+                            AV_QSV_SYNC_NUM);
+    for (i = 0; i < qsv_encode->sync_num; i++)
+    {
+        qsv_encode->p_syncp[i] = av_mallocz(sizeof(av_qsv_sync));
+        AV_QSV_CHECK_POINTER(qsv_encode->p_syncp[i], MFX_ERR_MEMORY_ALLOC);
+        qsv_encode->p_syncp[i]->p_sync = av_mallocz(sizeof(mfxSyncPoint));
+        AV_QSV_CHECK_POINTER(qsv_encode->p_syncp[i]->p_sync, MFX_ERR_MEMORY_ALLOC);
+    }
+
+    // initialize the encoder
+    sts = MFXVideoENCODE_Init(qsv->mfx_session, &qsv_encode->m_mfxVideoParam);
+    if (sts < MFX_ERR_NONE) // ignore warnings
+    {
+        hb_error("qsv_enc_init: MFXVideoENCODE_Init failed (%d)", sts);
+        *job->die = 1;
+        return -1;
+    }
+    qsv_encode->is_init_done = 1;
+
+    pv->init_done = 1;
+    return 0;
+}
+
+/***********************************************************************
+ * encqsvInit
+ ***********************************************************************
+ *
+ **********************************************************************/
+int encqsvInit(hb_work_object_t *w, hb_job_t *job)
+{
+    hb_work_private_t *pv = calloc(1, sizeof(hb_work_private_t));
+    w->private_data       = pv;
+
+    pv->job                = job;
+    pv->is_sys_mem         = !hb_qsv_decode_is_enabled(job);
+    pv->delayed_processing = hb_list_init();
+    pv->last_start         = INT64_MIN;
+    pv->frames_in          = 0;
+    pv->frames_out         = 0;
+    pv->init_done          = 0;
+    pv->is_vpp_present     = 0;
+
+    // set up a re-usable mfxEncodeCtrl to force keyframes (e.g. for chapters)
+    pv->force_keyframe.QP          = 0;
+    pv->force_keyframe.FrameType   = MFX_FRAMETYPE_I|MFX_FRAMETYPE_IDR|MFX_FRAMETYPE_REF;
+    pv->force_keyframe.NumExtParam = 0;
+    pv->force_keyframe.NumPayload  = 0;
+    pv->force_keyframe.ExtParam    = NULL;
+    pv->force_keyframe.Payload     = NULL;
+
+    pv->next_chapter.index = 0;
+    pv->next_chapter.start = INT64_MIN;
+
+    // default encoding parameters
+    if (hb_qsv_param_default(&pv->param, &pv->enc_space.m_mfxVideoParam))
+    {
+        hb_error("encqsvInit: hb_qsv_param_default failed");
+        return -1;
+    }
+
+    // set AsyncDepth to match that of decode and VPP
+    pv->param.videoParam->AsyncDepth = job->qsv_async_depth;
+
+    // enable and set colorimetry (video signal information)
+    pv->param.videoSignalInfo.ColourDescriptionPresent = 1;
+    switch (job->color_matrix_code)
+    {
+        case 4:
+            // custom
+            pv->param.videoSignalInfo.ColourPrimaries         = job->color_prim;
+            pv->param.videoSignalInfo.TransferCharacteristics = job->color_transfer;
+            pv->param.videoSignalInfo.MatrixCoefficients      = job->color_matrix;
+            break;
+        case 3:
+            // ITU BT.709 HD content
+            pv->param.videoSignalInfo.ColourPrimaries         = HB_COLR_PRI_BT709;
+            pv->param.videoSignalInfo.TransferCharacteristics = HB_COLR_TRA_BT709;
+            pv->param.videoSignalInfo.MatrixCoefficients      = HB_COLR_MAT_BT709;
+            break;
+        case 2:
+            // ITU BT.601 DVD or SD TV content (PAL)
+            pv->param.videoSignalInfo.ColourPrimaries         = HB_COLR_PRI_EBUTECH;
+            pv->param.videoSignalInfo.TransferCharacteristics = HB_COLR_TRA_BT709;
+            pv->param.videoSignalInfo.MatrixCoefficients      = HB_COLR_MAT_SMPTE170M;
+            break;
+        case 1:
+            // ITU BT.601 DVD or SD TV content (NTSC)
+            pv->param.videoSignalInfo.ColourPrimaries         = HB_COLR_PRI_SMPTEC;
+            pv->param.videoSignalInfo.TransferCharacteristics = HB_COLR_TRA_BT709;
+            pv->param.videoSignalInfo.MatrixCoefficients      = HB_COLR_MAT_SMPTE170M;
+            break;
+        default:
+            // detected during scan
+            pv->param.videoSignalInfo.ColourPrimaries         = job->title->color_prim;
+            pv->param.videoSignalInfo.TransferCharacteristics = job->title->color_transfer;
+            pv->param.videoSignalInfo.MatrixCoefficients      = job->title->color_matrix;
+            break;
+    }
+
+    // parse user-specified advanced options, if present
+    if (job->advanced_opts != NULL && job->advanced_opts[0] != '\0')
+    {
+        hb_dict_t *options_list;
+        hb_dict_entry_t *option = NULL;
+        options_list = hb_encopts_to_dict(job->advanced_opts, job->vcodec);
+        while ((option = hb_dict_next(options_list, option)) != NULL)
+        {
+            switch (hb_qsv_param_parse(&pv->param,
+                                       option->key, option->value, job->vcodec))
+            {
+                case HB_QSV_PARAM_OK:
+                    break;
+
+                case HB_QSV_PARAM_BAD_NAME:
+                    hb_log("encqsvInit: hb_qsv_param_parse: bad key %s",
+                           option->key);
+                    break;
+                case HB_QSV_PARAM_BAD_VALUE:
+                    hb_log("encqsvInit: hb_qsv_param_parse: bad value %s for key %s",
+                           option->value, option->key);
+                    break;
+                case HB_QSV_PARAM_UNSUPPORTED:
+                    hb_log("encqsvInit: hb_qsv_param_parse: unsupported option %s",
+                           option->key);
+                    break;
+
+                case HB_QSV_PARAM_ERROR:
+                default:
+                    hb_log("encqsvInit: hb_qsv_param_parse: unknown error");
+                    break;
+            }
+        }
+        hb_dict_free(&options_list);
+    }
+
+    // reload colorimetry in case values were set in advanced_opts
+    if (pv->param.videoSignalInfo.ColourDescriptionPresent)
+    {
+        job->color_matrix_code = 4;
+        job->color_prim        = pv->param.videoSignalInfo.ColourPrimaries;
+        job->color_transfer    = pv->param.videoSignalInfo.TransferCharacteristics;
+        job->color_matrix      = pv->param.videoSignalInfo.MatrixCoefficients;
+    }
+    else
+    {
+        job->color_matrix_code = 0;
+        job->color_prim        = HB_COLR_PRI_UNDEF;
+        job->color_transfer    = HB_COLR_TRA_UNDEF;
+        job->color_matrix      = HB_COLR_MAT_UNDEF;
+    }
+
+    // sanitize values that may exceed the Media SDK variable size
+    int64_t vrate, vrate_base;
+    int64_t par_width, par_height;
+    hb_limit_rational64(&vrate, &vrate_base,
+                        job->vrate, job->vrate_base, UINT32_MAX);
+    hb_limit_rational64(&par_width, &par_height,
+                        job->anamorphic.par_width,
+                        job->anamorphic.par_height, UINT16_MAX);
+
+    // some encoding parameters are used by filters to configure their output
+    if (pv->param.videoParam->mfx.FrameInfo.PicStruct != MFX_PICSTRUCT_PROGRESSIVE)
+    {
+        job->qsv_enc_info.align_height = AV_QSV_ALIGN32(job->height);
+    }
+    else
+    {
+        job->qsv_enc_info.align_height = AV_QSV_ALIGN16(job->height);
+    }
+    job->qsv_enc_info.align_width  = AV_QSV_ALIGN16(job->width);
+    job->qsv_enc_info.pic_struct   = pv->param.videoParam->mfx.FrameInfo.PicStruct;
+    job->qsv_enc_info.is_init_done = 1;
+
+    // encode to H.264 and set FrameInfo
+    pv->param.videoParam->mfx.CodecId                 = MFX_CODEC_AVC;
+    pv->param.videoParam->mfx.CodecLevel              = MFX_LEVEL_UNKNOWN;
+    pv->param.videoParam->mfx.CodecProfile            = MFX_PROFILE_UNKNOWN;
+    pv->param.videoParam->mfx.FrameInfo.FourCC        = MFX_FOURCC_NV12;
+    pv->param.videoParam->mfx.FrameInfo.ChromaFormat  = MFX_CHROMAFORMAT_YUV420;
+    pv->param.videoParam->mfx.FrameInfo.FrameRateExtN = vrate;
+    pv->param.videoParam->mfx.FrameInfo.FrameRateExtD = vrate_base;
+    pv->param.videoParam->mfx.FrameInfo.AspectRatioW  = par_width;
+    pv->param.videoParam->mfx.FrameInfo.AspectRatioH  = par_height;
+    pv->param.videoParam->mfx.FrameInfo.CropX         = 0;
+    pv->param.videoParam->mfx.FrameInfo.CropY         = 0;
+    pv->param.videoParam->mfx.FrameInfo.CropW         = job->width;
+    pv->param.videoParam->mfx.FrameInfo.CropH         = job->height;
+    pv->param.videoParam->mfx.FrameInfo.PicStruct     = job->qsv_enc_info.pic_struct;
+    pv->param.videoParam->mfx.FrameInfo.Width         = job->qsv_enc_info.align_width;
+    pv->param.videoParam->mfx.FrameInfo.Height        = job->qsv_enc_info.align_height;
+
+    // set H.264 profile and level
+    if (job->h264_profile != NULL && job->h264_profile[0] != '\0' &&
+        strcasecmp(job->h264_profile, "auto"))
+    {
+        if (!strcasecmp(job->h264_profile, "baseline"))
+        {
+            pv->param.videoParam->mfx.CodecProfile = MFX_PROFILE_AVC_BASELINE;
+        }
+        else if (!strcasecmp(job->h264_profile, "main"))
+        {
+            pv->param.videoParam->mfx.CodecProfile = MFX_PROFILE_AVC_MAIN;
+        }
+        else if (!strcasecmp(job->h264_profile, "high"))
+        {
+            pv->param.videoParam->mfx.CodecProfile = MFX_PROFILE_AVC_HIGH;
+        }
+        else
+        {
+            hb_error("encqsvInit: bad profile %s", job->h264_profile);
+            return -1;
+        }
+    }
+    if (job->h264_level != NULL && job->h264_level[0] != '\0' &&
+        strcasecmp(job->h264_level, "auto"))
+    {
+        int err;
+        int i = hb_qsv_atoindex(hb_h264_level_names, job->h264_level, &err);
+        if (err || i >= (sizeof(hb_h264_level_values) /
+                         sizeof(hb_h264_level_values[0])))
+        {
+            hb_error("encqsvInit: bad level %s", job->h264_level);
+            return -1;
+        }
+        else if (hb_qsv_info->capabilities & HB_QSV_CAP_MSDK_API_1_6)
+        {
+            pv->param.videoParam->mfx.CodecLevel = HB_QSV_CLIP3(MFX_LEVEL_AVC_1,
+                                                                MFX_LEVEL_AVC_52,
+                                                                hb_h264_level_values[i]);
+        }
+        else
+        {
+            // Media SDK API < 1.6, MFX_LEVEL_AVC_52 unsupported
+            pv->param.videoParam->mfx.CodecLevel = HB_QSV_CLIP3(MFX_LEVEL_AVC_1,
+                                                                MFX_LEVEL_AVC_51,
+                                                                hb_h264_level_values[i]);
+        }
+    }
+
+    // interlaced encoding is not always possible
+    if (pv->param.videoParam->mfx.FrameInfo.PicStruct != MFX_PICSTRUCT_PROGRESSIVE)
+    {
+        if (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_AVC_CONSTRAINED_BASELINE ||
+            pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_AVC_BASELINE             ||
+            pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_AVC_PROGRESSIVE_HIGH)
+        {
+            hb_error("encqsvInit: profile %s doesn't support interlaced encoding",
+                     qsv_h264_profile_xlat(pv->param.videoParam->mfx.CodecProfile));
+            return -1;
+        }
+        if ((pv->param.videoParam->mfx.CodecLevel >= MFX_LEVEL_AVC_1b &&
+             pv->param.videoParam->mfx.CodecLevel <= MFX_LEVEL_AVC_2) ||
+            (pv->param.videoParam->mfx.CodecLevel >= MFX_LEVEL_AVC_42))
+        {
+            hb_error("encqsvInit: level %s doesn't support interlaced encoding",
+                     qsv_h264_level_xlat(pv->param.videoParam->mfx.CodecLevel));
+            return -1;
+        }
+    }
+
+    // set rate control paremeters
+    if (job->vquality >= 0)
+    {
+        // introduced in API 1.1
+        pv->param.videoParam->mfx.RateControlMethod = MFX_RATECONTROL_CQP;
+        pv->param.videoParam->mfx.QPI = HB_QSV_CLIP3(0, 51, job->vquality + pv->param.rc.cqp_offsets[0]);
+        pv->param.videoParam->mfx.QPP = HB_QSV_CLIP3(0, 51, job->vquality + pv->param.rc.cqp_offsets[1]);
+        pv->param.videoParam->mfx.QPB = HB_QSV_CLIP3(0, 51, job->vquality + pv->param.rc.cqp_offsets[2]);
+    }
+    else if (job->vbitrate > 0)
+    {
+        // sanitize lookahead
+        if (!(hb_qsv_info->capabilities & HB_QSV_CAP_OPTION2_LOOKAHEAD))
+        {
+            // lookahead not supported
+            pv->param.rc.lookahead = 0;
+        }
+        else if (pv->param.rc.lookahead > 0 &&
+                 pv->param.videoParam->mfx.FrameInfo.PicStruct != MFX_PICSTRUCT_PROGRESSIVE)
+        {
+            // user force-enabled lookahead but we can't use it
+            hb_log("encqsvInit: MFX_RATECONTROL_LA not used (LookAhead is progressive-only)");
+            pv->param.rc.lookahead = 0;
+        }
+        else if (pv->param.rc.lookahead < 0)
+        {
+            if (pv->param.rc.vbv_max_bitrate > 0 ||
+                pv->param.videoParam->mfx.FrameInfo.PicStruct != MFX_PICSTRUCT_PROGRESSIVE)
+            {
+                // lookahead doesn't support VBV or interlaced encoding
+                pv->param.rc.lookahead = 0;
+            }
+            else
+            {
+                // set automatically based on target usage
+                pv->param.rc.lookahead = (pv->param.videoParam->mfx.TargetUsage <= MFX_TARGETUSAGE_2);
+            }
+        }
+        else
+        {
+            // user force-enabled or force-disabled lookahead
+            pv->param.rc.lookahead = !!pv->param.rc.lookahead;
+        }
+        if (pv->param.rc.lookahead)
+        {
+            // introduced in API 1.7
+            pv->param.videoParam->mfx.RateControlMethod = MFX_RATECONTROL_LA;
+            pv->param.videoParam->mfx.TargetKbps        = job->vbitrate;
+            if (pv->param.rc.vbv_max_bitrate > 0)
+            {
+                hb_log("encqsvInit: MFX_RATECONTROL_LA, ignoring VBV");
+            }
+        }
+        else if (job->vbitrate == pv->param.rc.vbv_max_bitrate)
+        {
+            // introduced in API 1.0
+            pv->param.videoParam->mfx.RateControlMethod = MFX_RATECONTROL_CBR;
+            pv->param.videoParam->mfx.MaxKbps           = job->vbitrate;
+            pv->param.videoParam->mfx.TargetKbps        = job->vbitrate;
+            pv->param.videoParam->mfx.BufferSizeInKB    = (pv->param.rc.vbv_buffer_size / 8);
+            // only set BufferSizeInKB and InitialDelayInKB is bufsize is set
+            // else Media SDK will pick some good values for us automatically
+            if (pv->param.rc.vbv_buffer_size > 0)
+            {
+                if (pv->param.rc.vbv_buffer_init > 1.0)
+                {
+                    pv->param.videoParam->mfx.InitialDelayInKB = (pv->param.rc.vbv_buffer_init / 8);
+                }
+                else
+                {
+                    pv->param.videoParam->mfx.InitialDelayInKB = (pv->param.rc.vbv_buffer_size *
+                                                                  pv->param.rc.vbv_buffer_init / 8);
+                }
+                pv->param.videoParam->mfx.BufferSizeInKB = (pv->param.rc.vbv_buffer_size / 8);
+            }
+        }
+        else if (pv->param.rc.vbv_max_bitrate > 0)
+        {
+            // introduced in API 1.0
+            pv->param.videoParam->mfx.RateControlMethod = MFX_RATECONTROL_VBR;
+            pv->param.videoParam->mfx.MaxKbps           = pv->param.rc.vbv_max_bitrate;
+            pv->param.videoParam->mfx.TargetKbps        = job->vbitrate;
+            // only set BufferSizeInKB and InitialDelayInKB is bufsize is set
+            // else Media SDK will pick some good values for us automatically
+            if (pv->param.rc.vbv_buffer_size > 0)
+            {
+                if (pv->param.rc.vbv_buffer_init > 1.0)
+                {
+                    pv->param.videoParam->mfx.InitialDelayInKB = (pv->param.rc.vbv_buffer_init / 8);
+                }
+                else
+                {
+                    pv->param.videoParam->mfx.InitialDelayInKB = (pv->param.rc.vbv_buffer_size *
+                                                                  pv->param.rc.vbv_buffer_init / 8);
+                }
+                pv->param.videoParam->mfx.BufferSizeInKB = (pv->param.rc.vbv_buffer_size / 8);
+            }
+        }
+        else
+        {
+            // introduced in API 1.3
+            // Media SDK will set Accuracy and Convergence for us automatically
+            pv->param.videoParam->mfx.RateControlMethod = MFX_RATECONTROL_AVBR;
+            pv->param.videoParam->mfx.TargetKbps        = job->vbitrate;
+        }
+    }
+    else
+    {
+        hb_error("encqsvInit: invalid rate control (%d, %d)",
+                 job->vquality, job->vbitrate);
+        return -1;
+    }
+
+    // set the keyframe interval
+    if (pv->param.gop.gop_pic_size < 0)
+    {
+        int rate = (int)((double)job->vrate / (double)job->vrate_base + 0.5);
+        if (pv->param.videoParam->mfx.RateControlMethod == MFX_RATECONTROL_CQP)
+        {
+            // ensure B-pyramid is enabled for CQP on Haswell
+            pv->param.gop.gop_pic_size = 32;
+        }
+        else
+        {
+            // set the keyframe interval based on the framerate
+            pv->param.gop.gop_pic_size = 5 * rate + 1;
+        }
+    }
+    pv->param.videoParam->mfx.GopPicSize = pv->param.gop.gop_pic_size;
+
+    // sanitize some settings that affect memory consumption
+    if (!pv->is_sys_mem)
+    {
+        // limit these to avoid running out of resources (causes hang)
+        pv->param.videoParam->mfx.GopRefDist   = FFMIN(pv->param.videoParam->mfx.GopRefDist,
+                                                       pv->param.rc.lookahead ? 8 : 16);
+        pv->param.codingOption2.LookAheadDepth = FFMIN(pv->param.codingOption2.LookAheadDepth,
+                                                       pv->param.rc.lookahead ? 48 - pv->param.videoParam->mfx.GopRefDist : 0);
+    }
+    else
+    {
+        // encode-only is a bit less sensitive to memory issues
+        pv->param.videoParam->mfx.GopRefDist   = FFMIN(pv->param.videoParam->mfx.GopRefDist, 16);
+        pv->param.codingOption2.LookAheadDepth = FFMIN(pv->param.codingOption2.LookAheadDepth,
+                                                       pv->param.rc.lookahead ? 60 : 0);
+    }
+
+    /*
+     * init a dummy encode-only session to get the SPS/PPS
+     * and the final output settings sanitized by Media SDK
+     * this is fine since the actual encode will use the same
+     * values for all parameters relevant to the H.264 bitstream
+     */
+    mfxIMPL impl;
+    mfxStatus err;
+    mfxVersion version;
+    mfxVideoParam videoParam;
+    mfxExtBuffer* ExtParamArray[3];
+    mfxSession session = (mfxSession)0;
+    mfxExtCodingOption  option1_buf, *option1 = &option1_buf;
+    mfxExtCodingOption2 option2_buf, *option2 = &option2_buf;
+    mfxExtCodingOptionSPSPPS sps_pps_buf, *sps_pps = &sps_pps_buf;
+    impl          = MFX_IMPL_AUTO_ANY|MFX_IMPL_VIA_ANY;
+    version.Major = HB_QSV_MINVERSION_MAJOR;
+    version.Minor = HB_QSV_MINVERSION_MINOR;
+    err = MFXInit(impl, &version, &session);
+    if (err != MFX_ERR_NONE)
+    {
+        hb_error("encqsvInit: MFXInit failed (%d)", err);
+        return -1;
+    }
+    err = MFXVideoENCODE_Init(session, pv->param.videoParam);
+    if (err < MFX_ERR_NONE) // ignore warnings
+    {
+        hb_error("encqsvInit: MFXVideoENCODE_Init failed (%d)", err);
+        MFXClose(session);
+        return -1;
+    }
+    memset(&videoParam, 0, sizeof(mfxVideoParam));
+    videoParam.ExtParam = ExtParamArray;
+    videoParam.NumExtParam = 0;
+    // introduced in API 1.3
+    memset(sps_pps, 0, sizeof(mfxExtCodingOptionSPSPPS));
+    sps_pps->Header.BufferId = MFX_EXTBUFF_CODING_OPTION_SPSPPS;
+    sps_pps->Header.BufferSz = sizeof(mfxExtCodingOptionSPSPPS);
+    sps_pps->SPSId           = 0;
+    sps_pps->SPSBuffer       = w->config->h264.sps;
+    sps_pps->SPSBufSize      = sizeof(w->config->h264.sps);
+    sps_pps->PPSId           = 0;
+    sps_pps->PPSBuffer       = w->config->h264.pps;
+    sps_pps->PPSBufSize      = sizeof(w->config->h264.pps);
+    videoParam.ExtParam[videoParam.NumExtParam++] = (mfxExtBuffer*)sps_pps;
+    // introduced in API 1.0
+    memset(option1, 0, sizeof(mfxExtCodingOption));
+    option1->Header.BufferId = MFX_EXTBUFF_CODING_OPTION;
+    option1->Header.BufferSz = sizeof(mfxExtCodingOption);
+    videoParam.ExtParam[videoParam.NumExtParam++] = (mfxExtBuffer*)option1;
+    // introduced in API 1.6
+    memset(option2, 0, sizeof(mfxExtCodingOption2));
+    option2->Header.BufferId = MFX_EXTBUFF_CODING_OPTION2;
+    option2->Header.BufferSz = sizeof(mfxExtCodingOption2);
+    if (hb_qsv_info->capabilities & HB_QSV_CAP_MSDK_API_1_6)
+    {
+        // attach to get the final output mfxExtCodingOption2 settings
+        videoParam.ExtParam[videoParam.NumExtParam++] = (mfxExtBuffer*)option2;
+    }
+    err = MFXVideoENCODE_GetVideoParam(session, &videoParam);
+    if (err == MFX_ERR_NONE)
+    {
+        // remove 32-bit NAL prefix (0x00 0x00 0x00 0x01)
+        w->config->h264.sps_length = sps_pps->SPSBufSize - 4;
+        memmove(w->config->h264.sps, w->config->h264.sps + 4,
+                w->config->h264.sps_length);
+        w->config->h264.pps_length = sps_pps->PPSBufSize - 4;
+        memmove(w->config->h264.pps, w->config->h264.pps + 4,
+                w->config->h264.pps_length);
+    }
+    else
+    {
+        hb_error("encqsvInit: MFXVideoENCODE_GetVideoParam failed (%d)", err);
+        MFXVideoENCODE_Close(session);
+        MFXClose            (session);
+        return -1;
+    }
+
+    // log implementation details before closing this session
+    if (pv->is_sys_mem)
+    {
+        hb_log("encqsvInit: using encode-only path");
+    }
+    if ((MFXQueryIMPL   (session, &impl)    == MFX_ERR_NONE) &&
+        (MFXQueryVersion(session, &version) == MFX_ERR_NONE))
+    {
+        hb_log("encqsvInit: using %s implementation (%"PRIu16".%"PRIu16")",
+               impl == MFX_IMPL_SOFTWARE ? "software" : "hardware",
+               version.Major, version.Minor);
+    }
+    MFXVideoENCODE_Close(session);
+    MFXClose            (session);
+
+    // log main output settings
+    hb_log("encqsvInit: TargetUsage %"PRIu16" AsyncDepth %"PRIu16"",
+           videoParam.mfx.TargetUsage, videoParam.AsyncDepth);
+    hb_log("encqsvInit: GopRefDist %"PRIu16" GopPicSize %"PRIu16" NumRefFrame %"PRIu16"",
+           videoParam.mfx.GopRefDist, videoParam.mfx.GopPicSize, videoParam.mfx.NumRefFrame);
+    if (videoParam.mfx.RateControlMethod == MFX_RATECONTROL_CQP)
+    {
+        char qpi[7], qpp[9], qpb[9];
+        snprintf(qpi, sizeof(qpi),  "QPI %"PRIu16"", videoParam.mfx.QPI);
+        snprintf(qpp, sizeof(qpp), " QPP %"PRIu16"", videoParam.mfx.QPP);
+        snprintf(qpb, sizeof(qpb), " QPB %"PRIu16"", videoParam.mfx.QPB);
+        hb_log("encqsvInit: RateControlMethod CQP with %s%s%s", qpi,
+               videoParam.mfx.GopPicSize > 1 ? qpp : "",
+               videoParam.mfx.GopRefDist > 1 ? qpb : "");
+    }
+    else
+    {
+        switch (videoParam.mfx.RateControlMethod)
+        {
+            case MFX_RATECONTROL_AVBR:
+                hb_log("encqsvInit: RateControlMethod AVBR TargetKbps %"PRIu16"",
+                       videoParam.mfx.TargetKbps);
+                break;
+            case MFX_RATECONTROL_LA:
+                hb_log("encqsvInit: RateControlMethod LA TargetKbps %"PRIu16" LookAheadDepth %"PRIu16"",
+                       videoParam.mfx.TargetKbps, option2->LookAheadDepth);
+                break;
+            case MFX_RATECONTROL_CBR:
+            case MFX_RATECONTROL_VBR:
+                hb_log("encqsvInit: RateControlMethod %s TargetKbps %"PRIu16" MaxKbps %"PRIu16" BufferSizeInKB %"PRIu16" InitialDelayInKB %"PRIu16"",
+                       videoParam.mfx.RateControlMethod == MFX_RATECONTROL_CBR ? "CBR" : "VBR",
+                       videoParam.mfx.TargetKbps,     videoParam.mfx.MaxKbps,
+                       videoParam.mfx.BufferSizeInKB, videoParam.mfx.InitialDelayInKB);
+                break;
+            default:
+                hb_log("encqsvInit: invalid rate control method %"PRIu16"",
+                       videoParam.mfx.RateControlMethod);
+                return -1;
+        }
+    }
+    switch (videoParam.mfx.FrameInfo.PicStruct)
+    {
+        case MFX_PICSTRUCT_PROGRESSIVE:
+            hb_log("encqsvInit: PicStruct progressive");
+            break;
+        case MFX_PICSTRUCT_FIELD_TFF:
+            hb_log("encqsvInit: PicStruct top field first");
+            break;
+        case MFX_PICSTRUCT_FIELD_BFF:
+            hb_log("encqsvInit: PicStruct bottom field first");
+            break;
+        default:
+            hb_error("encqsvInit: invalid PicStruct value 0x%"PRIx16"",
+                     videoParam.mfx.FrameInfo.PicStruct);
+            return -1;
+    }
+    const char *cavlc, *rdopt;
+    switch (option1->CAVLC)
+    {
+        case MFX_CODINGOPTION_ON:
+            cavlc = "on";
+            break;
+        case MFX_CODINGOPTION_OFF:
+            cavlc = "off";
+            break;
+        default:
+            hb_error("encqsvInit: invalid CAVLC value %"PRIu16"",
+                     option1->CAVLC);
+            return -1;
+    }
+    switch (option1->RateDistortionOpt)
+    {
+        case MFX_CODINGOPTION_ON:
+            rdopt = "on";
+            break;
+        case MFX_CODINGOPTION_OFF:
+            rdopt = "off";
+            break;
+        default:
+            hb_error("encqsvInit: invalid RateDistortionOpt value %"PRIu16"",
+                     option1->RateDistortionOpt);
+            return -1;
+    }
+    hb_log("encqsvInit: CAVLC %s RateDistortionOpt %s", cavlc, rdopt);
+    if (hb_qsv_info->capabilities & HB_QSV_CAP_OPTION2_BRC)
+    {
+        const char *mbbrc, *extbrc;
+        switch (option2->MBBRC)
+        {
+            case MFX_CODINGOPTION_ON:
+                mbbrc = "on";
+                break;
+            case MFX_CODINGOPTION_OFF:
+                mbbrc = "off";
+                break;
+            case MFX_CODINGOPTION_ADAPTIVE:
+                mbbrc = "adaptive";
+                break;
+            case MFX_CODINGOPTION_UNKNOWN:
+                mbbrc = "unknown (auto)";
+                break;
+            default:
+                hb_error("encqsvInit: invalid MBBRC value %"PRIu16"",
+                         option2->MBBRC);
+                return -1;
+        }
+        switch (option2->ExtBRC)
+        {
+            case MFX_CODINGOPTION_ON:
+                extbrc = "on";
+                break;
+            case MFX_CODINGOPTION_OFF:
+                extbrc = "off";
+                break;
+            case MFX_CODINGOPTION_ADAPTIVE:
+                extbrc = "adaptive";
+                break;
+            case MFX_CODINGOPTION_UNKNOWN:
+                extbrc = "unknown (auto)";
+                break;
+            default:
+                hb_error("encqsvInit: invalid ExtBRC value %"PRIu16"",
+                         option2->ExtBRC);
+                return -1;
+        }
+        hb_log("encqsvInit: MBBRC %s ExtBRC %s", mbbrc, extbrc);
+    }
+    if (hb_qsv_info->capabilities & HB_QSV_CAP_OPTION2_TRELLIS)
+    {
+        switch (option2->Trellis)
+        {
+            case MFX_TRELLIS_OFF:
+                hb_log("encqsvInit: Trellis off");
+                break;
+            case MFX_TRELLIS_UNKNOWN:
+                hb_log("encqsvInit: Trellis unknown (auto)");
+                break;
+            default:
+                hb_log("encqsvInit: Trellis on (%s%s%s)",
+                       option2->Trellis & MFX_TRELLIS_I ? "I" : "",
+                       option2->Trellis & MFX_TRELLIS_P ? "P" : "",
+                       option2->Trellis & MFX_TRELLIS_B ? "B" : "");
+                break;
+        }
+    }
+    hb_log("encqsvInit: H.264 profile %s @ level %s",
+           qsv_h264_profile_xlat(videoParam.mfx.CodecProfile),
+           qsv_h264_level_xlat  (videoParam.mfx.CodecLevel));
+
+    // AsyncDepth has now been set and/or modified by Media SDK
+    pv->max_async_depth = videoParam.AsyncDepth;
+    pv->async_depth     = 0;
+
+    // check whether B-frames are used
+    switch (videoParam.mfx.CodecProfile)
+    {
+        case MFX_PROFILE_AVC_BASELINE:
+        case MFX_PROFILE_AVC_CONSTRAINED_HIGH:
+        case MFX_PROFILE_AVC_CONSTRAINED_BASELINE:
+            pv->bfrm_delay = 0;
+            break;
+        default:
+            pv->bfrm_delay = 1;
+            break;
+    }
+    // sanitize
+    pv->bfrm_delay = FFMIN(pv->bfrm_delay, videoParam.mfx.GopRefDist - 1);
+    pv->bfrm_delay = FFMIN(pv->bfrm_delay, videoParam.mfx.GopPicSize - 2);
+    pv->bfrm_delay = FFMAX(pv->bfrm_delay, 0);
+    // let the muxer know whether to expect B-frames or not
+    job->areBframes = !!pv->bfrm_delay;
+    // check whether we need to generate DTS ourselves (MSDK API < 1.6 or VFR)
+    pv->bfrm_workaround = job->cfr != 1 || !(hb_qsv_info->capabilities &
+                                             HB_QSV_CAP_MSDK_API_1_6);
+    if (pv->bfrm_delay && pv->bfrm_workaround)
+    {
+        pv->bfrm_workaround = 1;
+        pv->list_dts        = hb_list_init();
+    }
+    else
+    {
+        pv->bfrm_workaround = 0;
+        pv->list_dts        = NULL;
+    }
+
+    return 0;
+}
+
+void encqsvClose( hb_work_object_t * w )
+{
+    int i = 0;
+    hb_work_private_t * pv = w->private_data;
+
+    hb_log( "enc_qsv done: frames: %u in, %u out", pv->frames_in, pv->frames_out );
+
+    // if system memory ( encode only ) additional free(s) for surfaces
+    if( pv && pv->job && pv->job->qsv &&
+        pv->job->qsv->is_context_active ){
+
+        av_qsv_context *qsv = pv->job->qsv;
+
+        if(qsv && qsv->enc_space){
+        av_qsv_space* qsv_encode = qsv->enc_space;
+        if(qsv_encode->is_init_done){
+            if(pv->is_sys_mem){
+                if( qsv_encode && qsv_encode->surface_num > 0)
+                    for (i = 0; i < qsv_encode->surface_num; i++){
+                        if( qsv_encode->p_surfaces[i]->Data.Y){
+                            free(qsv_encode->p_surfaces[i]->Data.Y);
+                            qsv_encode->p_surfaces[i]->Data.Y = 0;
+                        }
+                        if( qsv_encode->p_surfaces[i]->Data.VU){
+                            free(qsv_encode->p_surfaces[i]->Data.VU);
+                            qsv_encode->p_surfaces[i]->Data.VU = 0;
+                        }
+                        if(qsv_encode->p_surfaces[i])
+                            av_freep(qsv_encode->p_surfaces[i]);
+                    }
+                qsv_encode->surface_num = 0;
+
+                sws_freeContext(pv->sws_context_to_nv12);
+            }
+
+            for (i = av_qsv_list_count(qsv_encode->tasks); i > 1; i--){
+                av_qsv_task* task = av_qsv_list_item(qsv_encode->tasks,i-1);
+                if(task && task->bs){
+                    av_freep(&task->bs->Data);
+                    av_freep(&task->bs);
+                    av_qsv_list_rem(qsv_encode->tasks,task);
+                }
+            }
+            av_qsv_list_close(&qsv_encode->tasks);
+
+            for (i = 0; i < qsv_encode->surface_num; i++){
+                av_freep(&qsv_encode->p_surfaces[i]);
+            }
+            qsv_encode->surface_num = 0;
+
+            for (i = 0; i < qsv_encode->sync_num; i++){
+                    av_freep(&qsv_encode->p_syncp[i]->p_sync);
+                    av_freep(&qsv_encode->p_syncp[i]);
+            }
+            qsv_encode->sync_num = 0;
+
+                qsv_encode->is_init_done = 0;
+        }
+        }
+
+        if(qsv){
+        // closing the commong stuff
+        av_qsv_context_clean(qsv);
+
+        if(pv->is_sys_mem){
+            av_freep(&qsv);
+        }
+        }
+    }
+
+    if (pv != NULL)
+    {
+        if (pv->list_dts != NULL)
+        {
+            while (hb_list_count(pv->list_dts) > 0)
+            {
+                int64_t *item = hb_list_item(pv->list_dts, 0);
+                hb_list_rem(pv->list_dts, item);
+                free(item);
+            }
+            hb_list_close(&pv->list_dts);
+        }
+    }
+
+    free( pv );
+    w->private_data = NULL;
+}
+
+int encqsvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
+                  hb_buffer_t ** buf_out )
+{
+    hb_work_private_t * pv = w->private_data;
+    hb_job_t * job = pv->job;
+    hb_buffer_t * in = *buf_in, *buf;
+    av_qsv_context *qsv = job->qsv;
+    av_qsv_space* qsv_encode;
+    hb_buffer_t *last_buf = NULL;
+    mfxStatus sts = MFX_ERR_NONE;
+    int is_end = 0;
+    av_qsv_list* received_item = 0;
+    av_qsv_stage* stage = 0;
+
+    while(1){
+        int ret = qsv_enc_init(qsv, pv);
+        qsv = job->qsv;
+        qsv_encode = qsv->enc_space;
+        if(ret >= 2)
+            av_qsv_sleep(1);
+        else
+            break;
+    }
+    *buf_out = NULL;
+
+    if( in->size <= 0 )
+    {
+        // do delayed frames yet
+        *buf_in = NULL;
+        is_end = 1;
+    }
+
+    // input from decode, as called - we always have some to proceed with
+    while (1)
+    {
+    {
+        mfxEncodeCtrl    *work_control = NULL;
+        mfxFrameSurface1 *work_surface = NULL;
+
+        if (!is_end)
+        {
+            if (pv->is_sys_mem)
+            {
+                int surface_idx = av_qsv_get_free_surface(qsv_encode, qsv,
+                                                          &qsv_encode->request[0].Info, QSV_PART_ANY);
+                work_surface = qsv_encode->p_surfaces[surface_idx];
+
+                if (work_surface->Data.Y == NULL)
+                {
+                    // if nv12 and 422 12bits per pixel
+                    work_surface->Data.Pitch = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width;
+                    work_surface->Data.Y     = calloc(1,
+                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width *
+                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Height);
+                    work_surface->Data.VU    = calloc(1,
+                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width *
+                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Height / 2);
+                }
+                qsv_yuv420_to_nv12(pv->sws_context_to_nv12, work_surface, in);
+            }
+            else
+            {
+                received_item = in->qsv_details.qsv_atom;
+                stage = av_qsv_get_last_stage(received_item);
+                work_surface = stage->out.p_surface;
+
+                // don't let qsv->dts_seq grow needlessly
+                av_qsv_dts_pop(qsv);
+            }
+
+            work_surface->Data.TimeStamp = in->s.start;
+
+            /*
+             * Debugging code to check that the upstream modules have generated
+             * a continuous, self-consistent frame stream.
+             */
+            int64_t start = work_surface->Data.TimeStamp;
+            if (pv->last_start > start)
+            {
+                hb_log("encqsvWork: input continuity error, last start %"PRId64" start %"PRId64"",
+                       pv->last_start, start);
+            }
+            pv->last_start = start;
+
+            // for DTS generation (when MSDK API < 1.6 or VFR)
+            if (pv->bfrm_delay && pv->bfrm_workaround)
+            {
+                if (pv->frames_in <= BFRM_DELAY_MAX)
+                {
+                    pv->init_pts[pv->frames_in] = work_surface->Data.TimeStamp;
+                }
+                if (pv->frames_in)
+                {
+                    hb_qsv_add_new_dts(pv->list_dts,
+                                       work_surface->Data.TimeStamp);
+                }
+            }
+
+            /*
+             * Chapters have to start with a keyframe so request that this
+             * frame be coded as IDR. Since there may be several frames
+             * buffered in the encoder, remember the timestamp so when this
+             * frame finally pops out of the encoder we'll mark its buffer
+             * as the start of a chapter.
+             */
+            if (in->s.new_chap > 0 && job->chapter_markers)
+            {
+                if (!pv->next_chapter.index)
+                {
+                    pv->next_chapter.start = work_surface->Data.TimeStamp;
+                    pv->next_chapter.index = in->s.new_chap;
+                    work_control           = &pv->force_keyframe;
+                }
+                else
+                {
+                    // however unlikely, this can happen in theory
+                    hb_log("encqsvWork: got chapter %d before we could write chapter %d, dropping marker",
+                           in->s.new_chap, pv->next_chapter.index);
+                }
+                // don't let 'work_loop' put a chapter mark on the wrong buffer
+                in->s.new_chap = 0;
+            }
+
+            /*
+             * If interlaced encoding is requested during encoder initialization,
+             * but the input mfxFrameSurface1 is flagged as progressive here,
+             * the output bitstream will be progressive (according to MediaInfo).
+             *
+             * Assume the user knows what he's doing (say he is e.g. encoding a
+             * progressive-flagged source using interlaced compression - he may
+             * well have a good reason to do so; mis-flagged sources do exist).
+             */
+            work_surface->Info.PicStruct = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.PicStruct;
+        }
+        else{
+            work_surface = NULL;
+            received_item = NULL;
+        }
+        int sync_idx = av_qsv_get_free_sync( qsv_encode, qsv );
+        if (sync_idx == -1)
+        {
+            hb_error("qsv: Not enough resources allocated for QSV encode");
+            return 0;
+        }
+        av_qsv_task *task = av_qsv_list_item( qsv_encode->tasks, pv->async_depth );
+
+        for (;;)
+        {
+            // Encode a frame asychronously (returns immediately)
+            sts = MFXVideoENCODE_EncodeFrameAsync(qsv->mfx_session,
+                                                  work_control, work_surface, task->bs,
+                                                  qsv_encode->p_syncp[sync_idx]->p_sync);
+
+            if (MFX_ERR_MORE_DATA == sts || (MFX_ERR_NONE <= sts && MFX_WRN_DEVICE_BUSY != sts))
+                if (work_surface && !pv->is_sys_mem)
+                    ff_qsv_atomic_dec(&work_surface->Data.Locked);
+
+            if( MFX_ERR_MORE_DATA == sts ){
+                ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use);
+                if(work_surface && received_item)
+                    hb_list_add(pv->delayed_processing, received_item);
+                break;
+            }
+
+            AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
+
+            if (MFX_ERR_NONE <= sts /*&& !syncpE*/) // repeat the call if warning and no output
+            {
+                if (MFX_WRN_DEVICE_BUSY == sts){
+                    av_qsv_sleep(10); // wait if device is busy
+                    continue;
+                }
+
+                av_qsv_stage* new_stage = av_qsv_stage_init();
+                new_stage->type = AV_QSV_ENCODE;
+                new_stage->in.p_surface = work_surface;
+                new_stage->out.sync = qsv_encode->p_syncp[sync_idx];
+
+                new_stage->out.p_bs = task->bs;//qsv_encode->bs;
+                task->stage = new_stage;
+
+                pv->async_depth++;
+
+                if(received_item){
+                    av_qsv_add_stagee( &received_item, new_stage,HAVE_THREADS );
+                }
+                else{
+                   // flushing the end
+                    int pipe_idx = av_qsv_list_add( qsv->pipes, av_qsv_list_init(HAVE_THREADS) );
+                    av_qsv_list* list_item = av_qsv_list_item( qsv->pipes, pipe_idx );
+                    av_qsv_add_stagee( &list_item, new_stage,HAVE_THREADS );
+                }
+
+                int i = 0;
+                for(i=hb_list_count(pv->delayed_processing); i > 0;i--){
+                    hb_list_t *item = hb_list_item(pv->delayed_processing,i-1);
+                    if(item){
+                        hb_list_rem(pv->delayed_processing,item);
+                        av_qsv_flush_stages(qsv->pipes, &item);
+                    }
+                }
+
+                break;
+            }
+
+            ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use);
+
+            if (MFX_ERR_NOT_ENOUGH_BUFFER == sts)
+                DEBUG_ASSERT( 1,"The bitstream buffer size is insufficient." );
+
+            break;
+        }
+    }
+
+    buf = NULL;
+
+    do{
+
+    if(pv->async_depth==0) break;
+
+    // working properly with sync depth approach of MediaSDK OR flushing, if at the end
+    if( (pv->async_depth >= pv->max_async_depth) || is_end ){
+
+        pv->async_depth--;
+
+        av_qsv_task *task = av_qsv_list_item( qsv_encode->tasks, 0 );
+        av_qsv_stage* stage = task->stage;
+        av_qsv_list*  this_pipe = av_qsv_pipe_by_stage(qsv->pipes,stage);
+        sts = MFX_ERR_NONE;
+
+        // only here we need to wait on operation been completed, therefore SyncOperation is used,
+        // after this step - we continue to work with bitstream, muxing ...
+        av_qsv_wait_on_sync( qsv,stage );
+
+        if(task->bs->DataLength>0){
+                av_qsv_flush_stages( qsv->pipes, &this_pipe );
+
+                // see nal_encode
+                buf = hb_video_buffer_init( job->width, job->height );
+                buf->size = 0;
+                buf->s.frametype = 0;
+
+                // maping of FrameType(s)
+                if(task->bs->FrameType & MFX_FRAMETYPE_IDR ) buf->s.frametype = HB_FRAME_IDR;
+                else
+                if(task->bs->FrameType & MFX_FRAMETYPE_I )   buf->s.frametype = HB_FRAME_I;
+                else
+                if(task->bs->FrameType & MFX_FRAMETYPE_P )   buf->s.frametype = HB_FRAME_P;
+                else
+                if(task->bs->FrameType & MFX_FRAMETYPE_B )   buf->s.frametype = HB_FRAME_B;
+
+                if(task->bs->FrameType & MFX_FRAMETYPE_REF ) buf->s.flags      = HB_FRAME_REF;
+
+                parse_nalus(task->bs->Data + task->bs->DataOffset,task->bs->DataLength, buf, pv->frames_out);
+
+                if ( last_buf == NULL )
+                    *buf_out = buf;
+                else
+                    last_buf->next = buf;
+                last_buf = buf;
+
+            // simple for now but check on TimeStampCalc from MSDK
+            int64_t duration  = ((double)pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.FrameRateExtD /
+                                 (double)pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.FrameRateExtN) * 90000.;
+
+            // start        -> PTS
+            // renderOffset -> DTS
+            buf->s.start    = buf->s.renderOffset = task->bs->TimeStamp;
+            buf->s.stop     = buf->s.start + duration;
+            buf->s.duration = duration;
+            if (pv->bfrm_delay)
+            {
+                if (!pv->bfrm_workaround)
+                {
+                    buf->s.renderOffset = task->bs->DecodeTimeStamp;
+                }
+                else
+                {
+                    // MSDK API < 1.6 or VFR, so generate our own DTS
+                    if ((pv->frames_out == 0)                                 &&
+                        (hb_qsv_info->capabilities & HB_QSV_CAP_MSDK_API_1_6) &&
+                        (hb_qsv_info->capabilities & HB_QSV_CAP_H264_BPYRAMID))
+                    {
+                        // with B-pyramid, the delay may be more than 1 frame,
+                        // so compute the actual delay based on the initial DTS
+                        // provided by MSDK; also, account for rounding errors
+                        // (e.g. 24000/1001 fps @ 90kHz -> 3753.75 ticks/frame)
+                        pv->bfrm_delay = ((task->bs->TimeStamp -
+                                           task->bs->DecodeTimeStamp +
+                                           (duration / 2)) / duration);
+                        pv->bfrm_delay = FFMAX(pv->bfrm_delay, 1);
+                        pv->bfrm_delay = FFMIN(pv->bfrm_delay, BFRM_DELAY_MAX);
+                    }
+                    /*
+                     * Generate VFR-compatible output DTS based on input PTS.
+                     *
+                     * Depends on the B-frame delay:
+                     *
+                     * 0: ipts0,  ipts1, ipts2...
+                     * 1: ipts0 - ipts1, ipts1 - ipts1, ipts1,  ipts2...
+                     * 2: ipts0 - ipts2, ipts1 - ipts2, ipts2 - ipts2, ipts1...
+                     * ...and so on.
+                     */
+                    if (pv->frames_out <= pv->bfrm_delay)
+                    {
+                        buf->s.renderOffset = (pv->init_pts[pv->frames_out] -
+                                               pv->init_pts[pv->bfrm_delay]);
+                    }
+                    else
+                    {
+                        buf->s.renderOffset = hb_qsv_pop_next_dts(pv->list_dts);
+                    }
+                }
+
+                /*
+                 * In the MP4 container, DT(0) = STTS(0) = 0.
+                 *
+                 * Which gives us:
+                 * CT(0) = CTTS(0) + STTS(0) = CTTS(0) = PTS(0) - DTS(0)
+                 * When DTS(0) < PTS(0), we then have:
+                 * CT(0) > 0 for video, but not audio (breaks A/V sync).
+                 *
+                 * This is typically solved by writing an edit list shifting
+                 * video samples by the initial delay, PTS(0) - DTS(0).
+                 *
+                 * See:
+                 * ISO/IEC 14496-12:2008(E), ISO base media file format
+                 *  - 8.6.1.2 Decoding Time to Sample Box
+                 */
+                if (w->config->h264.init_delay == 0 && buf->s.renderOffset < 0)
+                {
+                    w->config->h264.init_delay = -buf->s.renderOffset;
+                }
+            }
+
+            /*
+             * If we have a chapter marker pending and this frame's
+             * presentation time stamp is at or after the marker's time stamp,
+             * use this as the chapter start.
+             */
+            if (pv->next_chapter.index && buf->s.frametype == HB_FRAME_IDR &&
+                pv->next_chapter.start <= buf->s.start)
+            {
+                buf->s.new_chap = pv->next_chapter.index;
+                pv->next_chapter.index = 0;
+            }
+
+                // shift for fifo
+                if(pv->async_depth){
+                    av_qsv_list_rem(qsv_encode->tasks,task);
+                    av_qsv_list_add(qsv_encode->tasks,task);
+                }
+
+                task->bs->DataLength    = 0;
+                task->bs->DataOffset    = 0;
+                task->bs->MaxLength = qsv_encode->p_buf_max_size;
+                task->stage        = 0;
+                pv->frames_out++;
+            }
+        }
+    }while(is_end);
+
+
+    if(is_end){
+        if( !buf && MFX_ERR_MORE_DATA == sts )
+            break;
+
+    }
+    else
+        break;
+
+    }
+
+    if(!is_end)
+        ++pv->frames_in;
+
+    if(is_end){
+        *buf_in = NULL;
+        if(last_buf){
+            last_buf->next = in;
+        }
+        else
+            *buf_out = in;
+        return HB_WORK_DONE;
+    }
+    else{
+        return HB_WORK_OK;
+    }
+}
+
+int nal_find_start_code(uint8_t** pb, size_t* size){
+    if ((int) *size < 4 )
+        return 0;
+
+    // find start code by MSDK , see ff_prefix_code[]
+    while ((4 <= *size) &&
+        ((0 != (*pb)[0]) ||
+         (0 != (*pb)[1]) ||
+         (1 != (*pb)[2]) ))
+    {
+        *pb += 1;
+        *size -= 1;
+    }
+
+    if (4 <= *size)
+        return (((*pb)[0] << 24) | ((*pb)[1] << 16) | ((*pb)[2] << 8) | ((*pb)[3]));
+
+    return 0;
+}
+
+void parse_nalus(uint8_t *nal_inits, size_t length, hb_buffer_t *buf, uint32_t frame_num){
+    uint8_t *offset = nal_inits;
+    size_t size     = length;
+
+    if( nal_find_start_code(&offset,&size) == 0 )
+        size = 0;
+
+    while( size > 0 ){
+
+            uint8_t* current_nal = offset + sizeof(ff_prefix_code)-1;
+            uint8_t *next_offset = offset + sizeof(ff_prefix_code);
+            size_t next_size     = size - sizeof(ff_prefix_code);
+            size_t current_size  = next_size;
+            if( nal_find_start_code(&next_offset,&next_size) == 0 ){
+                size = 0;
+                current_size += 1;
+            }
+            else{
+                current_size -= next_size;
+                if( next_offset > 0 && *(next_offset-1) != 0  )
+                    current_size += 1;
+            }
+            {
+                char size_position[4] = {0,0,0,0};
+                size_position[1] = (current_size >> 24) & 0xFF;
+                size_position[1] = (current_size >> 16) & 0xFF;
+                size_position[2] = (current_size >> 8)  & 0xFF;
+                size_position[3] =  current_size        & 0xFF;
+
+                memcpy(buf->data + buf->size,&size_position ,sizeof(size_position));
+                buf->size += sizeof(size_position);
+
+                memcpy(buf->data + buf->size,current_nal ,current_size);
+                buf->size += current_size;
+            }
+
+            if(size){
+                size   = next_size;
+                offset = next_offset;
+            }
+        }
+}
author	Rodeo <[email protected]>	2013-08-22 20:34:44 +0000
committer	Rodeo <[email protected]>	2013-08-22 20:34:44 +0000
commit	3326f988806a5decae025727784a19c8cc223833 (patch)
tree	cd75adb1975d223d7a0fd43a31030a78939e73d3 /libhb/enc_qsv.c
parent	d41905d539046445e1b81499ff7bd04d170c91d4 (diff)