3 files changed, 2293 insertions, 0 deletions
diff --git a/contrib/ffmpeg/A00-qsv.patch b/contrib/ffmpeg/A00-qsv.patch
new file mode 100644
index 000000000..908033617
--- /dev/null
+++ b/contrib/ffmpeg/A00-qsv.patch
@@ -0,0 +1,2285 @@
+diff -Naur ../../libav-v9.6/configure ./configure
+--- ../../libav-v9.6/configure	2013-05-12 08:39:07.000000000 +0200
++++ ./configure	2013-08-14 10:48:00.520497159 +0200
+@@ -133,6 +133,7 @@
+   --enable-vaapi           enable VAAPI code
+   --enable-vda             enable VDA code
+   --enable-vdpau           enable VDPAU code
++  --enable-qsv             enable QSV code
+ 
+ Individual component options:
+   --disable-everything     disable all components listed below
+@@ -1076,6 +1077,7 @@
+     vaapi
+     vda
+     vdpau
++    qsv
+     version3
+     xmm_clobber_test
+     x11grab
+@@ -1629,6 +1631,7 @@
+ wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
+ wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
+ wmv3_vdpau_decoder_select="vc1_vdpau_decoder"
++h264_qsv_decoder_select="qsv h264_decoder"
+ 
+ # parsers
+ h264_parser_select="error_resilience golomb h264dsp h264pred mpegvideo"
+@@ -3584,6 +3587,12 @@
+     check_cpp_condition vdpau/vdpau.h "defined VDP_DECODER_PROFILE_MPEG4_PART2_ASP" ||
+         { echolog "Please upgrade to libvdpau >= 0.2 if you would like vdpau support." && disable vdpau; }
+ fi
++if enabled qsv; then
++    disable qsv
++    check_header msdk/mfxvideo.h && enable qsv 
++else
++    disable qsv
++fi
+ 
+ enabled debug && add_cflags -g"$debuglevel" && add_asflags -g"$debuglevel"
+ 
+@@ -3795,6 +3804,7 @@
+ echo "libdxva2 enabled          ${dxva2-no}"
+ echo "libva enabled             ${vaapi-no}"
+ echo "libvdpau enabled          ${vdpau-no}"
++echo "libqsv enabled            ${qsv-no}"
+ echo "AVISynth enabled          ${avisynth-no}"
+ echo "frei0r enabled            ${frei0r-no}"
+ echo "gnutls enabled            ${gnutls-no}"
+diff -Naur ../../libav-v9.6/libavcodec/allcodecs.c ./libavcodec/allcodecs.c
+--- ../../libav-v9.6/libavcodec/allcodecs.c	2013-05-12 08:39:07.000000000 +0200
++++ ./libavcodec/allcodecs.c	2013-08-14 10:48:00.520497159 +0200
+@@ -143,6 +143,7 @@
+     REGISTER_DECODER(H263I,             h263i);
+     REGISTER_ENCODER(H263P,             h263p);
+     REGISTER_DECODER(H264,              h264);
++    REGISTER_DECODER(H264_QSV, 		h264_qsv);
+     REGISTER_DECODER(H264_VDPAU,        h264_vdpau);
+     REGISTER_ENCDEC (HUFFYUV,           huffyuv);
+     REGISTER_DECODER(IDCIN,             idcin);
+diff -Naur ../../libav-v9.6/libavcodec/Makefile ./libavcodec/Makefile
+--- ../../libav-v9.6/libavcodec/Makefile	2013-05-12 08:39:07.000000000 +0200
++++ ./libavcodec/Makefile	2013-08-14 10:48:00.521497282 +0200
+@@ -10,6 +10,7 @@
+           vdpau.h                                                       \
+           version.h                                                     \
+           xvmc.h                                                        \
++	  qsv.h								\
+ 
+ OBJS = allcodecs.o                                                      \
+        audioconvert.o                                                   \
+@@ -196,6 +197,7 @@
+                                           h264_loopfilter.o h264_direct.o      \
+                                           cabac.o h264_sei.o h264_ps.o         \
+                                           h264_refs.o h264_cavlc.o h264_cabac.o
++OBJS-$(CONFIG_H264_QSV_DECODER)        += qsv_h264.o qsv.o
+ OBJS-$(CONFIG_H264_DXVA2_HWACCEL)      += dxva2_h264.o
+ OBJS-$(CONFIG_H264_VAAPI_HWACCEL)      += vaapi_h264.o
+ OBJS-$(CONFIG_H264_VDA_HWACCEL)        += vda_h264.o
+diff -Naur ../../libav-v9.6/libavcodec/qsv.c ./libavcodec/qsv.c
+--- ../../libav-v9.6/libavcodec/qsv.c	1970-01-01 01:00:00.000000000 +0100
++++ ./libavcodec/qsv.c	2013-08-19 21:32:01.704244071 +0200
+@@ -0,0 +1,646 @@
++/* ********************************************************************* *\
++
++Copyright (C) 2013 Intel Corporation.  All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++- Redistributions of source code must retain the above copyright notice,
++this list of conditions and the following disclaimer.
++- Redistributions in binary form must reproduce the above copyright notice,
++this list of conditions and the following disclaimer in the documentation
++and/or other materials provided with the distribution.
++- Neither the name of Intel Corporation nor the names of its contributors
++may be used to endorse or promote products derived from this software
++without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR
++IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
++OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
++IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT,
++INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
++NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
++THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++\* ********************************************************************* */
++
++#include "qsv.h"
++
++#include "avcodec.h"
++#include "internal.h"
++
++int av_qsv_get_free_encode_task(av_qsv_list * tasks)
++{
++    int ret = MFX_ERR_NOT_FOUND;
++    int i = 0;
++    if (tasks)
++        for (i = 0; i < av_qsv_list_count(tasks); i++) {
++            av_qsv_task *task = av_qsv_list_item(tasks, i);
++            if (task->stage && task->stage->out.sync)
++                if (!(*task->stage->out.sync->p_sync)) {
++                    ret = i;
++                    break;
++                }
++        }
++    return ret;
++}
++
++int av_qsv_get_free_sync(av_qsv_space * space, av_qsv_context * qsv)
++{
++    int ret = -1;
++    int counter = 0;
++
++    while (1) {
++        for (int i = 0; i < space->sync_num; i++) {
++            if (!(*(space->p_syncp[i]->p_sync)) &&
++                0 == space->p_syncp[i]->in_use ) {
++                if (i > space->sync_num_max_used)
++                    space->sync_num_max_used = i;
++                ff_qsv_atomic_inc(&space->p_syncp[i]->in_use);
++                return i;
++            }
++        }
++#if HAVE_THREADS
++        if (++counter >= AV_QSV_REPEAT_NUM_DEFAULT) {
++#endif
++            av_log(NULL, AV_LOG_FATAL, "not enough to have %d sync point(s) allocated\n",
++                   space->sync_num);
++            break;
++#if HAVE_THREADS
++        }
++        av_qsv_sleep(5);
++#endif
++    }
++    return ret;
++}
++
++int av_qsv_get_free_surface(av_qsv_space * space, av_qsv_context * qsv,
++                     mfxFrameInfo * info, av_qsv_split part)
++{
++    int ret = -1;
++    int from = 0;
++    int up = space->surface_num;
++    int counter = 0;
++
++    while (1) {
++        from = 0;
++        up = space->surface_num;
++        if (part == QSV_PART_LOWER)
++            up /= 2;
++        if (part == QSV_PART_UPPER)
++            from = up / 2;
++
++        for (int i = from; i < up; i++) {
++            if (0 == space->p_surfaces[i]->Data.Locked) {
++                memcpy(&(space->p_surfaces[i]->Info), info,
++                       sizeof(mfxFrameInfo));
++                if (i > space->surface_num_max_used)
++                    space->surface_num_max_used = i;
++                return i;
++            }
++        }
++#if HAVE_THREADS
++        if (++counter >= AV_QSV_REPEAT_NUM_DEFAULT) {
++#endif
++            av_log(NULL, AV_LOG_FATAL,
++                   "not enough to have %d surface(s) allocated\n", up);
++            break;
++#if HAVE_THREADS
++        }
++        av_qsv_sleep(5);
++#endif
++    }
++    return ret;
++}
++
++int ff_qsv_is_surface_in_pipe(mfxFrameSurface1 * p_surface, av_qsv_context * qsv)
++{
++    int ret = 0;
++    int a, b,i;
++    av_qsv_list *list = 0;
++    av_qsv_stage *stage = 0;
++
++    if (!p_surface)
++        return ret;
++    if (!qsv->pipes)
++        return ret;
++
++    for (a = 0; a < av_qsv_list_count(qsv->pipes); a++) {
++        list = av_qsv_list_item(qsv->pipes, a);
++        for (b = 0; b < av_qsv_list_count(list); b++) {
++            stage = av_qsv_list_item(list, b);
++            if (p_surface == stage->out.p_surface)
++                return (stage->type << 16) | 2;
++            if (p_surface == stage->in.p_surface)
++                return (stage->type << 16) | 1;
++        }
++    }
++    return ret;
++}
++
++int ff_qsv_is_sync_in_pipe(mfxSyncPoint * sync, av_qsv_context * qsv)
++{
++    int ret = 0;
++    int a, b;
++    av_qsv_list *list = 0;
++    av_qsv_stage *stage = 0;
++
++    if (!sync)
++        return ret;
++    if (!qsv->pipes)
++        return ret;
++
++    for (a = 0; a < av_qsv_list_count(qsv->pipes); a++) {
++        list = av_qsv_list_item(qsv->pipes, a);
++        for (b = 0; b < av_qsv_list_count(list); b++) {
++            stage = av_qsv_list_item(list, b);
++            if (sync == stage->out.sync->p_sync) {
++                return 1;
++            }
++        }
++    }
++    return ret;
++}
++
++av_qsv_stage *av_qsv_stage_init(void)
++{
++    av_qsv_stage *stage = av_mallocz(sizeof(av_qsv_stage));
++    return stage;
++}
++
++void av_qsv_stage_clean(av_qsv_stage ** stage)
++{
++    if ((*stage)->out.sync) {
++        if ((*stage)->out.sync->p_sync)
++            *(*stage)->out.sync->p_sync = 0;
++        if ((*stage)->out.sync->in_use > 0)
++            ff_qsv_atomic_dec(&(*stage)->out.sync->in_use);
++        (*stage)->out.sync = 0;
++    }
++    if ((*stage)->out.p_surface) {
++        (*stage)->out.p_surface = 0;
++
++    }
++    if ((*stage)->in.p_surface) {
++        (*stage)->in.p_surface = 0;
++    }
++
++    av_freep(stage);
++}
++
++void av_qsv_add_context_usage(av_qsv_context * qsv, int is_threaded)
++{
++    int is_active = 0;
++#if HAVE_THREADS
++    int mut_ret = 0;
++#endif
++
++    is_active = ff_qsv_atomic_inc(&qsv->is_context_active);
++    if (is_active == 1) {
++        memset(&qsv->mfx_session, 0, sizeof(mfxSession));
++        av_qsv_pipe_list_create(&qsv->pipes, is_threaded);
++
++        qsv->dts_seq = av_qsv_list_init(is_threaded);
++
++#if HAVE_THREADS
++        if (is_threaded) {
++            qsv->qts_seq_mutex = av_mallocz(sizeof(pthread_mutex_t));
++            if (qsv->qts_seq_mutex){
++                mut_ret = pthread_mutex_init(qsv->qts_seq_mutex, NULL);
++                if(mut_ret)
++                    av_log(NULL, AV_LOG_ERROR, "pthread_mutex_init issue[%d] at %s\n",mut_ret,__FUNCTION__);
++            }
++
++        } else
++#endif
++            qsv->qts_seq_mutex = 0;
++    }
++}
++
++int av_qsv_context_clean(av_qsv_context * qsv)
++{
++    int is_active = 0;
++    mfxStatus sts = MFX_ERR_NONE;
++#if HAVE_THREADS
++    int mut_ret = 0;
++#endif
++
++    is_active = ff_qsv_atomic_dec(&qsv->is_context_active);
++
++    // spaces would have to be cleaned on the own,
++    // here we care about the rest, common stuff
++    if (is_active == 0) {
++
++        if (qsv->dts_seq) {
++            while (av_qsv_list_count(qsv->dts_seq))
++                av_qsv_dts_pop(qsv);
++
++            av_qsv_list_close(&qsv->dts_seq);
++        }
++#if HAVE_THREADS
++        if (qsv->qts_seq_mutex) {
++            mut_ret = pthread_mutex_destroy(qsv->qts_seq_mutex);
++            if(mut_ret)
++                av_log(NULL, AV_LOG_ERROR, "pthread_mutex_destroy issue[%d] at %s\n", mut_ret,__FUNCTION__);
++#endif
++            qsv->qts_seq_mutex = 0;
++#if HAVE_THREADS
++        }
++#endif
++
++        if (qsv->pipes)
++            av_qsv_pipe_list_clean(&qsv->pipes);
++
++        if (qsv->mfx_session) {
++            sts = MFXClose(qsv->mfx_session);
++            AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++            qsv->mfx_session = 0;
++        }
++    }
++    return 0;
++}
++
++void av_qsv_pipe_list_create(av_qsv_list ** list, int is_threaded)
++{
++    if (!*list)
++        *list = av_qsv_list_init(is_threaded);
++}
++
++void av_qsv_pipe_list_clean(av_qsv_list ** list)
++{
++    av_qsv_list *stage;
++    int i = 0;
++    if (*list) {
++        for (i = av_qsv_list_count(*list); i > 0; i--) {
++            stage = av_qsv_list_item(*list, i - 1);
++            av_qsv_flush_stages(*list, &stage);
++        }
++        av_qsv_list_close(list);
++    }
++}
++
++void av_qsv_add_stagee(av_qsv_list ** list, av_qsv_stage * stage, int is_threaded)
++{
++    if (!*list)
++        *list = av_qsv_list_init(is_threaded);
++    av_qsv_list_add(*list, stage);
++}
++
++av_qsv_stage *av_qsv_get_last_stage(av_qsv_list * list)
++{
++    av_qsv_stage *stage = 0;
++    int size = 0;
++
++    av_qsv_list_lock(list);
++    size = av_qsv_list_count(list);
++    if (size > 0)
++        stage = av_qsv_list_item(list, size - 1);
++    av_qsv_list_unlock(list);
++
++    return stage;
++}
++
++void av_qsv_flush_stages(av_qsv_list * list, av_qsv_list ** item)
++{
++    int i = 0;
++    int x = 0;
++    av_qsv_stage *stage = 0;
++    av_qsv_list *to_remove_list = 0;
++    av_qsv_list *to_remove_atom_list = 0;
++    av_qsv_list *to_remove_atom = 0;
++
++    for (i = 0; i < av_qsv_list_count(*item); i++) {
++        stage = av_qsv_list_item(*item, i);
++        if(stage->pending){
++            if(!to_remove_list)
++                to_remove_list = av_qsv_list_init(0);
++            av_qsv_list_add(to_remove_list, stage->pending);
++        }
++        av_qsv_stage_clean(&stage);
++        // should actually remove from the list but ok...
++    }
++    av_qsv_list_rem(list, *item);
++    av_qsv_list_close(item);
++
++    if(to_remove_list){
++        for (i = av_qsv_list_count(to_remove_list); i > 0; i--){
++            to_remove_atom_list = av_qsv_list_item(to_remove_list, i-1);
++            for (x = av_qsv_list_count(to_remove_atom_list); x > 0; x--){
++                to_remove_atom = av_qsv_list_item(to_remove_atom_list, x-1);
++                av_qsv_flush_stages(list,&to_remove_atom);
++            }
++        }
++        av_qsv_list_close(&to_remove_list);
++    }
++}
++
++av_qsv_list *av_qsv_pipe_by_stage(av_qsv_list * list, av_qsv_stage * stage)
++{
++    av_qsv_list *item = 0;
++    av_qsv_stage *cur_stage = 0;
++    int i = 0;
++    int a = 0;
++    for (i = 0; i < av_qsv_list_count(list); i++) {
++        item = av_qsv_list_item(list, i);
++        for (a = 0; a < av_qsv_list_count(item); a++) {
++            cur_stage = av_qsv_list_item(item, a);
++            if (cur_stage == stage)
++                return item;
++        }
++    }
++    return 0;
++}
++
++// no duplicate of the same value, if end == 0 : working over full length
++void av_qsv_dts_ordered_insert(av_qsv_context * qsv, int start, int end,
++                            int64_t dts, int iter)
++{
++    av_qsv_dts *cur_dts = 0;
++    av_qsv_dts *new_dts = 0;
++    int i = 0;
++#if HAVE_THREADS
++    int mut_ret = 0;
++#endif
++
++
++#if HAVE_THREADS
++    if (iter == 0 && qsv->qts_seq_mutex){
++        mut_ret = pthread_mutex_lock(qsv->qts_seq_mutex);
++        if(mut_ret)
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",mut_ret, __FUNCTION__);
++    }
++#endif
++
++    if (end == 0)
++        end = av_qsv_list_count(qsv->dts_seq);
++
++    if (end <= start) {
++        new_dts = av_mallocz(sizeof(av_qsv_dts));
++        if( new_dts ) {
++            new_dts->dts = dts;
++            av_qsv_list_add(qsv->dts_seq, new_dts);
++        }
++    } else
++        for (i = end; i > start; i--) {
++            cur_dts = av_qsv_list_item(qsv->dts_seq, i - 1);
++            if (cur_dts->dts < dts) {
++                new_dts = av_mallocz(sizeof(av_qsv_dts));
++                if( new_dts ) {
++                    new_dts->dts = dts;
++                    av_qsv_list_insert(qsv->dts_seq, i, new_dts);
++                }
++                break;
++            } else if (cur_dts->dts == dts)
++                break;
++        }
++#if HAVE_THREADS
++    if (iter == 0 && qsv->qts_seq_mutex){
++        mut_ret = pthread_mutex_unlock(qsv->qts_seq_mutex);
++        if(mut_ret)
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_unlock issue[%d] at %s\n",mut_ret, __FUNCTION__);
++    }
++#endif
++}
++
++void av_qsv_dts_pop(av_qsv_context * qsv)
++{
++    av_qsv_dts *item = 0;
++#if HAVE_THREADS
++    int mut_ret = 0;
++#endif
++
++#if HAVE_THREADS
++    if (qsv && qsv->qts_seq_mutex){
++        mut_ret = pthread_mutex_lock(qsv->qts_seq_mutex);
++        if(mut_ret)
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",mut_ret, __FUNCTION__);
++    }
++#endif
++
++    if (av_qsv_list_count(qsv->dts_seq)) {
++        item = av_qsv_list_item(qsv->dts_seq, 0);
++        av_qsv_list_rem(qsv->dts_seq, item);
++        av_free(item);
++    }
++#if HAVE_THREADS
++    if (qsv && qsv->qts_seq_mutex){
++        mut_ret = pthread_mutex_unlock(qsv->qts_seq_mutex);
++        if(mut_ret)
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",mut_ret, __FUNCTION__);
++        }
++#endif
++}
++
++
++av_qsv_list *av_qsv_list_init(int is_threaded)
++{
++    av_qsv_list *l;
++#if HAVE_THREADS
++    int mut_ret;
++#endif
++
++    l = av_mallocz(sizeof(av_qsv_list));
++    if (!l)
++        return 0;
++    l->items = av_mallocz(AV_QSV_JOB_SIZE_DEFAULT * sizeof(void *));
++    if (!l->items)
++        return 0;
++    l->items_alloc = AV_QSV_JOB_SIZE_DEFAULT;
++
++#if HAVE_THREADS
++    if (is_threaded) {
++        l->mutex = av_mallocz(sizeof(pthread_mutex_t));
++        if (l->mutex){
++            mut_ret = pthread_mutexattr_init(&l->mta);
++            if( mut_ret )
++                av_log(NULL, AV_LOG_ERROR, "pthread_mutexattr_init issue[%d] at %s\n",mut_ret, __FUNCTION__);
++            mut_ret = pthread_mutexattr_settype(&l->mta, PTHREAD_MUTEX_RECURSIVE /*PTHREAD_MUTEX_ERRORCHECK*/);
++            if( mut_ret )
++                av_log(NULL, AV_LOG_ERROR, "pthread_mutexattr_settype issue[%d] at %s\n",mut_ret, __FUNCTION__);
++            mut_ret = pthread_mutex_init(l->mutex, &l->mta);
++            if( mut_ret )
++                av_log(NULL, AV_LOG_ERROR, "pthread_mutex_init issue[%d] at %s\n",mut_ret, __FUNCTION__);
++        }
++    } else
++#endif
++        l->mutex = 0;
++    return l;
++}
++
++int av_qsv_list_count(av_qsv_list * l)
++{
++    int count;
++
++    av_qsv_list_lock(l);
++    count = l->items_count;
++    av_qsv_list_unlock(l);
++    return count;
++}
++
++int av_qsv_list_add(av_qsv_list * l, void *p)
++{
++    int pos = -1;
++
++    if (!p) {
++        return pos;
++    }
++
++    av_qsv_list_lock(l);
++
++    if (l->items_count == l->items_alloc) {
++        /* We need a bigger boat */
++        l->items_alloc += AV_QSV_JOB_SIZE_DEFAULT;
++        l->items = av_realloc(l->items, l->items_alloc * sizeof(void *));
++    }
++
++    l->items[l->items_count] = p;
++    pos = (l->items_count);
++    l->items_count++;
++
++    av_qsv_list_unlock(l);
++
++    return pos;
++}
++
++void av_qsv_list_rem(av_qsv_list * l, void *p)
++{
++    int i;
++
++    av_qsv_list_lock(l);
++
++    /* Find the item in the list */
++    for (i = 0; i < l->items_count; i++) {
++        if (l->items[i] == p) {
++            /* Shift all items after it sizeof( void * ) bytes earlier */
++            memmove(&l->items[i], &l->items[i + 1],
++                    (l->items_count - i - 1) * sizeof(void *));
++
++            l->items_count--;
++            break;
++        }
++    }
++
++    av_qsv_list_unlock(l);
++}
++
++void *av_qsv_list_item(av_qsv_list * l, int i)
++{
++    void *ret = NULL;
++
++    if (i < 0)
++        return NULL;
++
++    av_qsv_list_lock(l);
++    if( i < l->items_count)
++        ret = l->items[i];
++    av_qsv_list_unlock(l);
++    return ret;
++}
++
++void av_qsv_list_insert(av_qsv_list * l, int pos, void *p)
++{
++
++    if (!p)
++        return;
++
++    av_qsv_list_lock(l);
++
++    if (l->items_count == l->items_alloc) {
++        l->items_alloc += AV_QSV_JOB_SIZE_DEFAULT;
++        l->items = av_realloc(l->items, l->items_alloc * sizeof(void *));
++    }
++
++    if (l->items_count != pos) {
++        memmove(&l->items[pos + 1], &l->items[pos],
++                (l->items_count - pos) * sizeof(void *));
++    }
++
++    l->items[pos] = p;
++    l->items_count--;
++
++    av_qsv_list_unlock(l);
++}
++
++void av_qsv_list_close(av_qsv_list ** _l)
++{
++    av_qsv_list *l = *_l;
++#if HAVE_THREADS
++    int mut_ret;
++#endif
++
++    av_qsv_list_lock(l);
++
++    av_free(l->items);
++
++#if HAVE_THREADS
++    if (l->mutex){
++        mut_ret = pthread_mutex_unlock(l->mutex);
++        if( mut_ret )
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_unlock issue[%d] at %s\n",mut_ret, __FUNCTION__);
++        mut_ret = pthread_mutex_destroy(&l->mutex);
++        mut_ret = pthread_mutexattr_destroy(&l->mta);
++    }
++#endif
++    av_freep(_l);
++}
++
++int av_qsv_list_lock(av_qsv_list *l){
++    int ret = 0;
++#if HAVE_THREADS
++    if (l->mutex){
++        ret = pthread_mutex_lock(l->mutex);
++        if( ret )
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",ret, __FUNCTION__);
++    }
++#endif
++    return ret;
++}
++
++int av_qsv_list_unlock(av_qsv_list *l){
++    int ret = 0;
++#if HAVE_THREADS
++    if (l->mutex){
++        ret = pthread_mutex_unlock(l->mutex);
++        if( ret )
++            av_log(NULL, AV_LOG_ERROR, "pthread_mutex_unlock issue[%d] at %s\n",ret, __FUNCTION__);
++    }
++#endif
++    return ret;
++}
++
++int av_is_qsv_available(mfxIMPL impl, mfxVersion * ver)
++{
++    mfxStatus sts = MFX_ERR_NONE;
++    mfxSession mfx_session;
++
++    memset(&mfx_session, 0, sizeof(mfxSession));
++    sts = MFXInit(impl, ver, &mfx_session);
++    if (sts >= 0)
++        MFXClose(mfx_session);
++    return sts;
++}
++
++void av_qsv_wait_on_sync(av_qsv_context *qsv, av_qsv_stage *stage)
++{
++    int iter = 0;
++    mfxStatus sts = MFX_ERR_NONE;
++    if( stage )
++        if(*stage->out.sync->p_sync){
++            while(1){
++                iter++;
++                sts = MFXVideoCORE_SyncOperation(qsv->mfx_session,*stage->out.sync->p_sync, AV_QSV_SYNC_TIME_DEFAULT);
++                if(MFX_WRN_IN_EXECUTION == sts){
++
++                    if(iter>20)
++                        AV_QSV_DEBUG_ASSERT(1, "Sync failed");
++
++                    av_qsv_sleep(10);
++                    continue;
++                }
++                AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++                break;
++            }
++        }
++}
+\ No newline at end of file
+diff -Naur ../../libav-v9.6/libavcodec/qsv.h ./libavcodec/qsv.h
+--- ../../libav-v9.6/libavcodec/qsv.h	1970-01-01 01:00:00.000000000 +0100
++++ ./libavcodec/qsv.h	2013-08-19 21:32:01.709244686 +0200
+@@ -0,0 +1,494 @@
++/* ********************************************************************* *\
++
++Copyright (C) 2013 Intel Corporation.  All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++- Redistributions of source code must retain the above copyright notice,
++this list of conditions and the following disclaimer.
++- Redistributions in binary form must reproduce the above copyright notice,
++this list of conditions and the following disclaimer in the documentation
++and/or other materials provided with the distribution.
++- Neither the name of Intel Corporation nor the names of its contributors
++may be used to endorse or promote products derived from this software
++without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR
++IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
++OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
++IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT,
++INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
++NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
++THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++\* ********************************************************************* */
++
++#ifndef AVCODEC_QSV_H
++#define AVCODEC_QSV_H
++
++/**
++ * @file
++ * @ingroup lavc_codec_hwaccel_qsv
++ * Common header for QSV/MediaSDK acceleration
++ */
++
++/**
++ * @defgroup lavc_codec_hwaccel_qsv QSV/MediaSDK based Decode/Encode and VPP
++ * @ingroup lavc_codec_hwaccel
++ *
++ *  As Intel Quick Sync Video (QSV) can decode/preprocess/encode with HW
++ *  acceleration.
++ *
++ *  Supported features:
++ *    - access:
++ *      - format AV_PIX_FMT_QSV_H264, AVCodec decoder based implementation
++ *      - name "h264_qsv", avcodec_find_decoder_by_name( "h264_qsv")
++ *    - IO Pattern:
++ *      - Opaque memory: MFX_IOPATTERN_OUT_OPAQUE_MEMORY // Video memory is
++ *                       MFX_IMPL_HARDWARE or MFX_IMPL_AUTO and runtime support,
++ *                       otherwise: System Memory
++ *      - System memory: MFX_IOPATTERN_OUT_SYSTEM_MEMORY
++ *    - Allocators:
++ *      - default allocator for System memory: MFX_MEMTYPE_SYSTEM_MEMORY
++ *    - details:
++ *      implementation as "per frame"
++ *
++ *  TODO list:
++ *    - access:
++ *      - format AV_PIX_FMT_QSV_MPEG2
++ *      - format AV_PIX_FMT_QSV_VC1
++ *      - format AV_PIX_FMT_QSV, see "details" below
++ *    - IO Pattern:
++ *      - VIDEO_MEMORY  // MFX_IOPATTERN_OUT_VIDEO_MEMORY
++ *    - Allocators:
++ *      - Video memory: MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET /
++ *                      MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET
++ *    - details:
++ *      "per slice" support: AV_PIX_FMT_QSV with AVHWAccel based implementation
++ *
++ *  Note av_qsv_config struct required to fill in via
++ *  AVCodecContext.hwaccel_context
++ *
++ *  As per frame, note AVFrame.data[2] (qsv_atom) used for frame atom id,
++ *  data/linesize should be used together with SYSTEM_MEMORY and tested
++ *
++ *  Note: Compilation would require:
++ *   - Intel MediaSDK headers, Full SDK is avaialble from the original web site:
++ *                     http://software.intel.com/en-us/vcsource/tools/media-SDK
++ *     Will be referenced as msdk/*.h (mfxdefs.h, mfxstructures.h, ... )
++ *  and
++ *  - Final application has to link against Intel MediaSDK dispatcher, available
++ *     at MediaSDK as well
++ *
++ *  Target OS: as per available dispatcher and driver support
++ *
++ *  Implementation details:
++ *   Provided struct av_qsv_context contain several struct av_qsv_space(s) for decode,
++ *   VPP and encode.
++ *   av_qsv_space just contain needed environment for the appropriate action.
++ *   Based on this - pipeline (see pipes) will be build to pass details such as
++ *   mfxFrameSurface1* and mfxSyncPoint* from one action to the next.
++ *
++ *  Resources re-usage (av_qsv_flush_stages):
++ *     av_qsv_context *qsv = (av_qsv_context *)video_codec_ctx->priv_data;
++ *     av_qsv_list *pipe = (av_qsv_list *)video_frame->data[2];
++ *     av_qsv_flush_stages( qsv->pipes, &pipe );
++ *
++ *  DTS re-usage:
++ *     av_qsv_dts_pop(qsv);
++ *
++ *   for video,DX9/11 memory it has to be Unlock'ed as well
++ *
++ *  Implementation is thread aware and uses synchronization point(s) from MediaSDK
++ *  as per configuration.
++ *
++ *  For the details of MediaSDK usage and options available - please refer to the
++ *  available documentation at MediaSDK.
++ *
++ *  Feature set used from MSDK is defined by AV_QSV_MSDK_VERSION_MAJOR and
++ *  AV_QSV_MSDK_VERSION_MINOR
++ *
++ * @{
++ */
++
++#include <stdint.h>
++#include <string.h>
++#include "msdk/mfxvideo.h"
++#include "libavutil/mem.h"
++#include "libavutil/time.h"
++
++#ifdef HAVE_AV_CONFIG_H
++#include "config.h"
++#endif
++
++#if HAVE_THREADS
++#if defined (__GNUC__)
++#include <pthread.h>
++#define ff_qsv_atomic_inc(ptr) __sync_add_and_fetch(ptr,1)
++#define ff_qsv_atomic_dec(ptr) __sync_sub_and_fetch (ptr,1)
++#elif HAVE_WINDOWS_H            // MSVC case
++#include <windows.h>
++#if HAVE_PTHREADS
++#include <pthread.h>
++#elif HAVE_W32THREADS
++#include "w32pthreads.h"
++#endif
++#define ff_qsv_atomic_inc(ptr) InterlockedIncrement(ptr)
++#define ff_qsv_atomic_dec(ptr) InterlockedDecrement (ptr)
++#else
++// targeting only for MinGW or MSVC
++#endif
++
++#else
++#define ff_qsv_atomic_inc(ptr) ((*ptr)++)
++#define ff_qsv_atomic_dec(ptr) ((*ptr)--)
++#endif
++
++
++// sleep is defined in milliseconds
++#define av_qsv_sleep(x) av_usleep((x)*1000)
++
++#define AV_QSV_ZERO_MEMORY(VAR)                    {memset(&VAR, 0, sizeof(VAR));}
++#define AV_QSV_ALIGN32(X)                      (((mfxU32)((X)+31)) & (~ (mfxU32)31))
++#define AV_QSV_ALIGN16(value)                  (((value + 15) >> 4) << 4)
++#ifndef AV_QSV_PRINT_RET_MSG
++#define AV_QSV_PRINT_RET_MSG(ERR)              { av_log(NULL, AV_LOG_FATAL,"Error code %d,\t%s\t%d\n", ERR, __FUNCTION__, __LINE__); }
++#endif
++
++#ifndef AV_QSV_DEBUG_ASSERT
++#define AV_QSV_DEBUG_ASSERT(x,y)               {if ((x)) {av_log(NULL, AV_LOG_FATAL,"\nASSERT: %s\n",y);};}
++#endif
++
++#define AV_QSV_CHECK_RESULT(P, X, ERR)             {if ((X) > (P)) {AV_QSV_PRINT_RET_MSG(ERR); return ERR;}}
++#define AV_QSV_CHECK_POINTER(P, ERR)               {if (!(P)) {AV_QSV_PRINT_RET_MSG(ERR); return ERR;}}
++#define AV_QSV_IGNORE_MFX_STS(P, X)                {if ((X) == (P)) {P = MFX_ERR_NONE;}}
++
++#define AV_QSV_ID_BUFFER MFX_MAKEFOURCC('B','U','F','F')
++#define AV_QSV_ID_FRAME  MFX_MAKEFOURCC('F','R','M','E')
++
++#define AV_QSV_SURFACE_NUM              80
++#define AV_QSV_SYNC_NUM                 AV_QSV_SURFACE_NUM*3/4
++#define AV_QSV_BUF_SIZE_DEFAULT         4096*2160*10
++#define AV_QSV_JOB_SIZE_DEFAULT         10
++#define AV_QSV_SYNC_TIME_DEFAULT        10000
++// see av_qsv_get_free_sync, av_qsv_get_free_surface , 100 if usleep(10*1000)(10ms) == 1 sec
++#define AV_QSV_REPEAT_NUM_DEFAULT      100
++#define AV_QSV_ASYNC_DEPTH_DEFAULT     4
++
++// version of MSDK/QSV API currently used
++#define AV_QSV_MSDK_VERSION_MAJOR  1
++#define AV_QSV_MSDK_VERSION_MINOR  3
++
++typedef enum AV_QSV_STAGE_TYPE {
++
++#define AV_QSV_DECODE_MASK   0x001
++    AV_QSV_DECODE   = 0x001,
++
++#define AV_QSV_VPP_MASK      0x0F0
++    // "Mandatory VPP filter" , might be with "Hint-based VPP filters"
++    AV_QSV_VPP_DEFAULT = 0x010,
++    // "User Modules" etc
++    AV_QSV_VPP_USER = 0x020,
++
++#define av_QSV_ENCODE_MASK   0x100
++    AV_QSV_ENCODE   = 0x100
++#define AV_QSV_ANY_MASK      0xFFF
++} AV_QSV_STAGE_TYPE;
++
++
++typedef struct av_qsv_list {
++    // practically pthread_mutex_t
++    void *mutex;
++#if HAVE_THREADS
++    pthread_mutexattr_t   mta;
++#endif
++
++    void **items;
++    int items_alloc;
++
++    int items_count;
++} av_qsv_list;
++
++typedef struct av_qsv_sync {
++    mfxSyncPoint*   p_sync;
++    int             in_use;
++} av_qsv_sync;
++
++typedef struct av_qsv_stage {
++    AV_QSV_STAGE_TYPE type;
++    struct {
++        mfxBitstream *p_bs;
++        mfxFrameSurface1 *p_surface;
++    } in;
++    struct {
++        mfxBitstream *p_bs;
++        mfxFrameSurface1 *p_surface;
++        av_qsv_sync *sync;
++    } out;
++    av_qsv_list *pending;
++} av_qsv_stage;
++
++typedef struct av_qsv_task {
++    mfxBitstream *bs;
++    av_qsv_stage *stage;
++} av_qsv_task;
++
++
++typedef struct av_qsv_space {
++
++    uint8_t is_init_done;
++
++    AV_QSV_STAGE_TYPE type;
++
++    mfxVideoParam m_mfxVideoParam;
++
++    mfxFrameAllocResponse response;
++    mfxFrameAllocRequest request[2];    // [0] - in, [1] - out, if needed
++
++    mfxExtOpaqueSurfaceAlloc ext_opaque_alloc;
++    mfxExtBuffer **p_ext_params;
++    uint16_t p_ext_param_num;
++
++    uint16_t surface_num_max_used;
++    uint16_t surface_num;
++    mfxFrameSurface1 *p_surfaces[AV_QSV_SURFACE_NUM];
++
++    uint16_t sync_num_max_used;
++    uint16_t sync_num;
++    av_qsv_sync *p_syncp[AV_QSV_SYNC_NUM];
++
++    mfxBitstream bs;
++    uint8_t *p_buf;
++    size_t p_buf_max_size;
++
++    // only for encode and tasks
++    av_qsv_list *tasks;
++
++    av_qsv_list *pending;
++
++    // storage for allocations/mfxMemId*
++    mfxMemId *mids;
++} av_qsv_space;
++
++typedef struct av_qsv_context {
++    volatile int is_context_active;
++
++    mfxIMPL impl;
++    mfxSession mfx_session;
++    mfxVersion ver;
++
++    // decode
++    av_qsv_space *dec_space;
++    // encode
++    av_qsv_space *enc_space;
++    // vpp
++    av_qsv_list *vpp_space;
++
++    av_qsv_list *pipes;
++
++    // MediaSDK starting from API version 1.6 includes DecodeTimeStamp
++    // in addition to TimeStamp
++    // see also AV_QSV_MSDK_VERSION_MINOR , AV_QSV_MSDK_VERSION_MAJOR
++    av_qsv_list *dts_seq;
++
++    // practically pthread_mutex_t
++    void *qts_seq_mutex;
++
++    int is_anex;
++
++    void *qsv_config;
++
++} av_qsv_context;
++
++typedef enum {
++    QSV_PART_ANY = 0,
++    QSV_PART_LOWER,
++    QSV_PART_UPPER
++} av_qsv_split;
++
++typedef struct {
++    int64_t dts;
++} av_qsv_dts;
++
++typedef struct av_qsv_alloc_frame {
++    mfxU32 id;
++    mfxFrameInfo info;
++} av_qsv_alloc_frame;
++
++typedef struct av_qsv_alloc_buffer {
++    mfxU32 id;
++    mfxU32 nbytes;
++    mfxU16 type;
++} av_qsv_alloc_buffer;
++
++typedef struct av_qsv_allocators_space {
++    av_qsv_space *space;
++    mfxFrameAllocator frame_alloc;
++    mfxBufferAllocator buffer_alloc;
++} av_qsv_allocators_space;
++
++typedef struct av_qsv_config {
++    /**
++     * Set asynch depth of processing with QSV
++     * Format: 0 and more
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    int async_depth;
++
++    /**
++     * Range of numbers that indicate trade-offs between quality and speed.
++     * Format: from 1/MFX_TARGETUSAGE_BEST_QUALITY to 7/MFX_TARGETUSAGE_BEST_SPEED inclusive
++     *
++     * - encoding: Set by user.
++     * - decoding: unused
++     */
++    int target_usage;
++
++    /**
++     * Number of reference frames; if NumRefFrame = 0, this parameter is not specified.
++     * Format: 0 and more
++     *
++     * - encoding: Set by user.
++     * - decoding: unused
++     */
++    int num_ref_frame;
++
++    /**
++     * Distance between I- or P- key frames; if it is zero, the GOP structure is unspecified.
++     * Note: If GopRefDist = 1, there are no B-frames used.
++     *
++     * - encoding: Set by user.
++     * - decoding: unused
++     */
++     int gop_ref_dist;
++
++    /**
++     * Number of pictures within the current GOP (Group of Pictures); if GopPicSize=0,
++     * then the GOP size is unspecified. If GopPicSize=1, only I-frames are used.
++     *
++     * - encoding: Set by user.
++     * - decoding: unused
++     */
++     int gop_pic_size;
++
++    /**
++     * Set type of surfaces used with QSV
++     * Format: "IOPattern enum" of Media SDK
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    int io_pattern;
++
++    /**
++     * Set amount of additional surfaces might be needed
++     * Format: ammount of additional buffers(surfaces+syncs)
++     * to allocate in advance
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    int additional_buffers;
++
++    /**
++     * If pipeline should be sync.
++     * Format: wait time in milliseconds,
++     *         AV_QSV_SYNC_TIME_DEFAULT/10000 might be a good value
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    int sync_need;
++
++    /**
++     * Type of implementation needed
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    int impl_requested;
++
++    /**
++     * if QSV usage is multithreaded.
++     * Format: Yes/No, 1/0
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    int usage_threaded;
++
++    /**
++     * if QSV use an external allocation (valid per session/mfxSession)
++     * Format: pointer to allocators, if default: 0
++     *
++     * note that:
++     * System Memory:   can be used without provided and external allocator,
++     *  meaning MediaSDK will use an internal one
++     * Video Memory:    in this case - we must provide an external allocator
++     * Also, Media SDK session doesn't require external allocator if the application
++     *  uses opaque memory
++     *
++     * Calls SetFrameAllocator/SetBufferAllocator
++     * (MFXVideoCORE_SetFrameAllocator/MFXVideoCORE_SetBufferAllocator)
++     * are to pass allocators to Media SDK
++     *
++     * - encoding: Set by user.
++     * - decoding: Set by user.
++     */
++    av_qsv_allocators_space *allocators;
++
++} av_qsv_config;
++
++#define ANEX_UNKNOWN    0
++#define ANEX_PREFIX     1
++#define ANEX_NO_PREFIX  2
++
++static const uint8_t ff_prefix_code[] = { 0x00, 0x00, 0x00, 0x01 };
++
++int av_qsv_get_free_sync(av_qsv_space *, av_qsv_context *);
++int av_qsv_get_free_surface(av_qsv_space *, av_qsv_context *, mfxFrameInfo *,
++                     av_qsv_split);
++int av_qsv_get_free_encode_task(av_qsv_list *);
++
++int av_is_qsv_available(mfxIMPL, mfxVersion *);
++void av_qsv_wait_on_sync(av_qsv_context *, av_qsv_stage *);
++
++void av_qsv_add_context_usage(av_qsv_context *, int);
++
++void av_qsv_pipe_list_create(av_qsv_list **, int);
++void av_qsv_pipe_list_clean(av_qsv_list **);
++
++void av_qsv_add_stagee(av_qsv_list **, av_qsv_stage *, int);
++av_qsv_stage *av_qsv_get_last_stage(av_qsv_list *);
++av_qsv_list *av_qsv_pipe_by_stage(av_qsv_list *, av_qsv_stage *);
++void av_qsv_flush_stages(av_qsv_list *, av_qsv_list **);
++
++void av_qsv_dts_ordered_insert(av_qsv_context *, int, int, int64_t, int);
++void av_qsv_dts_pop(av_qsv_context *);
++
++av_qsv_stage *av_qsv_stage_init(void);
++void av_qsv_stage_clean(av_qsv_stage **);
++int av_qsv_context_clean(av_qsv_context *);
++
++int ff_qsv_is_sync_in_pipe(mfxSyncPoint *, av_qsv_context *);
++int ff_qsv_is_surface_in_pipe(mfxFrameSurface1 *, av_qsv_context *);
++
++av_qsv_list *av_qsv_list_init(int);
++int av_qsv_list_lock(av_qsv_list *);
++int av_qsv_list_unlock(av_qsv_list *);
++int av_qsv_list_add(av_qsv_list *, void *);
++void av_qsv_list_rem(av_qsv_list *, void *);
++void av_qsv_list_insert(av_qsv_list *, int, void *);
++void av_qsv_list_close(av_qsv_list **);
++
++int av_qsv_list_count(av_qsv_list *);
++void *av_qsv_list_item(av_qsv_list *, int);
++
++/* @} */
++
++#endif                          //AVCODEC_QSV_H
+diff -Naur ../../libav-v9.6/libavcodec/qsv_h264.c ./libavcodec/qsv_h264.c
+--- ../../libav-v9.6/libavcodec/qsv_h264.c	1970-01-01 01:00:00.000000000 +0100
++++ ./libavcodec/qsv_h264.c	2013-08-19 21:32:01.705244194 +0200
+@@ -0,0 +1,974 @@
++/* ********************************************************************* *\
++
++Copyright (C) 2013 Intel Corporation.  All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++- Redistributions of source code must retain the above copyright notice,
++this list of conditions and the following disclaimer.
++- Redistributions in binary form must reproduce the above copyright notice,
++this list of conditions and the following disclaimer in the documentation
++and/or other materials provided with the distribution.
++- Neither the name of Intel Corporation nor the names of its contributors
++may be used to endorse or promote products derived from this software
++without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR
++IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
++OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
++IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT,
++INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
++NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
++THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++\* ********************************************************************* */
++
++#include "h264.h"
++#include "h264data.h"
++#include "qsv_h264.h"
++
++static av_qsv_config av_qsv_default_config = {
++    .async_depth        = AV_QSV_ASYNC_DEPTH_DEFAULT,
++    .target_usage       = MFX_TARGETUSAGE_BALANCED,
++    .num_ref_frame      = 0,
++    .gop_ref_dist       = 0,
++    .gop_pic_size       = 0,
++    .io_pattern         = MFX_IOPATTERN_OUT_OPAQUE_MEMORY,
++    .additional_buffers = 0,
++    .sync_need          = 0,
++    .impl_requested     = MFX_IMPL_HARDWARE,
++    .usage_threaded     = 0,
++    .allocators         = 0,
++};
++
++static av_qsv_allocators_space av_qsv_default_system_allocators = {
++    // fill to access mids
++    .space = 0,
++
++    .frame_alloc = {
++                    .pthis      = &av_qsv_default_system_allocators,
++                    .Alloc      = ff_qsv_mem_frame_alloc,
++                    .Lock       = ff_qsv_mem_frame_lock,
++                    .Unlock     = ff_qsv_mem_frame_unlock,
++                    .GetHDL     = ff_qsv_mem_frame_getHDL,
++                    .Free       = ff_qsv_mem_frame_free,
++                    },
++    .buffer_alloc = {
++                     .pthis     = &av_qsv_default_system_allocators,
++                     .Alloc     = ff_qsv_mem_buffer_alloc,
++                     .Lock      = ff_qsv_mem_buffer_lock,
++                     .Unlock    = ff_qsv_mem_buffer_unlock,
++                     .Free      = ff_qsv_mem_buffer_free,
++                     },
++};
++
++static const uint8_t ff_slice_code[] = { 0x00, 0x00, 0x01, 0x65 };
++
++int ff_qsv_nal_find_start_code(uint8_t * pb, size_t size)
++{
++    if ((int) size < 4)
++        return 0;
++
++    while ((4 <= size) && ((0 != pb[0]) || (0 != pb[1]) || (0 != pb[2]) || (1 != pb[3]))) {
++        pb += 1;
++        size -= 1;
++    }
++
++    if (4 <= size)
++        return 1;
++
++    return 0;
++}
++
++int ff_qsv_dec_init_clean(AVCodecContext *avctx)
++{
++     mfxStatus sts = MFX_ERR_NONE;
++     av_qsv_context *qsv = avctx->priv_data;
++     av_qsv_space *qsv_decode = qsv->dec_space;
++     av_qsv_context_clean(qsv);
++     av_freep(&avctx->priv_data);
++}
++int ff_qsv_dec_init(AVCodecContext * avctx)
++{
++    int ret = 0;
++    mfxStatus sts = MFX_ERR_NONE;
++    size_t current_offset = 6;
++    int header_size = 0;
++    unsigned char *current_position;
++    size_t current_size;
++
++    av_qsv_context *qsv = avctx->priv_data;
++    av_qsv_space *qsv_decode = qsv->dec_space;
++    av_qsv_config *qsv_config_context = avctx->hwaccel_context;
++
++    qsv->impl = qsv_config_context->impl_requested;
++
++    memset(&qsv->mfx_session, 0, sizeof(mfxSession));
++    qsv->ver.Major = AV_QSV_MSDK_VERSION_MAJOR;
++    qsv->ver.Minor = AV_QSV_MSDK_VERSION_MINOR;
++
++    sts = MFXInit(qsv->impl, &qsv->ver, &qsv->mfx_session);
++    AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++
++    AV_QSV_ZERO_MEMORY(qsv_decode->m_mfxVideoParam);
++    AV_QSV_ZERO_MEMORY(qsv_decode->m_mfxVideoParam.mfx);
++    qsv_decode->m_mfxVideoParam.mfx.CodecId = MFX_CODEC_AVC;
++    qsv_decode->m_mfxVideoParam.IOPattern =
++        qsv_config_context->io_pattern;
++
++    qsv_decode->m_mfxVideoParam.AsyncDepth =
++        qsv_config_context->async_depth;
++
++    AV_QSV_ZERO_MEMORY(qsv_decode->bs);
++    {
++        current_position    = avctx->extradata;
++        current_size        = avctx->extradata_size;
++
++        if (!ff_qsv_nal_find_start_code(current_position, current_size)) {
++
++            while (current_offset <= current_size) {
++                int current_nal_size =
++                    (unsigned char) current_position[current_offset] << 8 |
++                    (unsigned char) current_position[current_offset + 1];
++                unsigned char nal_type =
++                    (unsigned char) current_position[current_offset + 2] & 0x1F;
++
++                if (nal_type == NAL_SPS || nal_type == NAL_PPS) {
++                    memcpy(&qsv_decode->p_buf[header_size], ff_prefix_code,
++                           sizeof(ff_prefix_code));
++                    header_size += sizeof(ff_prefix_code);
++                    memcpy(&qsv_decode->p_buf[header_size],
++                           &current_position[current_offset + 2],
++                           current_nal_size);
++
++                    // fix for PPS as it comes after SPS, so - last
++                    if (nal_type == NAL_PPS) {
++                        // fix of MFXVideoDECODE_DecodeHeader: needs one SLICE to find, any SLICE
++                        memcpy(&qsv_decode->p_buf
++                               [header_size + current_nal_size],
++                               ff_slice_code, current_nal_size);
++                        header_size += sizeof(ff_slice_code);
++                    }
++                }
++
++                header_size += current_nal_size;
++                current_offset += current_nal_size + 3;
++            }
++        } else {
++            memcpy(&qsv_decode->p_buf[0], avctx->extradata,
++                   avctx->extradata_size);
++            header_size = avctx->extradata_size;
++            memcpy(&qsv_decode->p_buf
++                   [header_size], ff_slice_code, sizeof(ff_slice_code));
++            header_size += sizeof(ff_slice_code);
++        }
++    }
++
++    qsv_decode->bs.Data         = qsv_decode->p_buf;
++    qsv_decode->bs.DataLength   = header_size;
++    qsv_decode->bs.MaxLength    = qsv_decode->p_buf_max_size;
++
++    if (qsv_decode->bs.DataLength > qsv_decode->bs.MaxLength) {
++        av_log(avctx, AV_LOG_FATAL, "DataLength > MaxLength\n");
++        return -1;
++    }
++
++    sts = MFXVideoDECODE_DecodeHeader(qsv->mfx_session, &qsv_decode->bs,
++                                    &qsv_decode->m_mfxVideoParam);
++
++    AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++
++    qsv_decode->bs.DataLength   -= sizeof(ff_slice_code);
++
++    memset(&qsv_decode->request, 0, sizeof(mfxFrameAllocRequest) * 2);
++    sts = MFXVideoDECODE_QueryIOSurf(qsv->mfx_session,
++                                   &qsv_decode->m_mfxVideoParam,
++                                   &qsv_decode->request);
++
++    AV_QSV_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION);
++    AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++
++    qsv_decode->surface_num =
++        FFMIN(qsv_decode->request[0].NumFrameSuggested +
++            qsv_config_context->async_depth +
++            qsv_config_context->additional_buffers, AV_QSV_SURFACE_NUM);
++
++    if (qsv_decode->surface_num <= 0)
++        qsv_decode->surface_num = AV_QSV_SURFACE_NUM;
++
++    if (qsv_decode->m_mfxVideoParam.IOPattern ==
++        MFX_IOPATTERN_OUT_SYSTEM_MEMORY) {
++
++        // as per non-opaque memory:
++        if (!qsv_config_context->allocators) {
++            av_log(avctx, AV_LOG_INFO,
++                   "Using default allocators for QSV decode\n");
++            ((av_qsv_config *) avctx->hwaccel_context)->allocators =
++                &av_qsv_default_system_allocators;
++        }
++
++        qsv_config_context->allocators->space = qsv_decode;
++
++        qsv_decode->request[0].NumFrameMin       = qsv_decode->surface_num;
++        qsv_decode->request[0].NumFrameSuggested = qsv_decode->surface_num;
++
++        qsv_decode->request[0].Type = MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE;
++        // qsv_decode->request[0].Type |= m_bd3dAlloc ? MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET : MFX_MEMTYPE_SYSTEM_MEMORY;
++        qsv_decode->request[0].Type |= MFX_MEMTYPE_SYSTEM_MEMORY;
++
++        qsv_config_context->allocators->
++            frame_alloc.Alloc(qsv_config_context->allocators,
++                              &qsv_decode->request[0],
++                              &qsv_decode->response);
++    }
++
++    for (int i = 0; i < qsv_decode->surface_num; i++) {
++        qsv_decode->p_surfaces[i] = av_mallocz(sizeof(mfxFrameSurface1));
++        AV_QSV_CHECK_POINTER(qsv_decode->p_surfaces[i],
++                           AVERROR(ENOMEM));
++        memcpy(&(qsv_decode->p_surfaces[i]->Info),
++               &(qsv_decode->request[0].Info), sizeof(mfxFrameInfo));
++
++        // for an external(like DX9/11) based allocation:
++        // we bind:
++        //    m_pmfxSurfaces[i].Data.MemId = m_mfxResponse.mids[i];
++        // else, System memory:
++        if (qsv_decode->m_mfxVideoParam.IOPattern ==
++            MFX_IOPATTERN_OUT_SYSTEM_MEMORY) {
++            sts =
++                qsv_config_context->allocators->
++                frame_alloc.Lock(qsv_config_context->allocators,
++                                 qsv_decode->response.mids[i],
++                                 &(qsv_decode->p_surfaces[i]->Data));
++            AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++        }
++    }
++
++    qsv_decode->sync_num = FFMIN(qsv_decode->surface_num, AV_QSV_SYNC_NUM);
++    for (int i = 0; i < qsv_decode->sync_num; i++) {
++        qsv_decode->p_syncp[i] = av_mallocz(sizeof(av_qsv_sync));
++        AV_QSV_CHECK_POINTER(qsv_decode->p_syncp[i], AVERROR(ENOMEM));
++        qsv_decode->p_syncp[i]->p_sync = av_mallocz(sizeof(mfxSyncPoint));
++        AV_QSV_CHECK_POINTER(qsv_decode->p_syncp[i]->p_sync, AVERROR(ENOMEM));
++    }
++
++    memset(&qsv_decode->ext_opaque_alloc, 0,
++           sizeof(mfxExtOpaqueSurfaceAlloc));
++
++    if (qsv_decode->m_mfxVideoParam.IOPattern ==
++        MFX_IOPATTERN_OUT_OPAQUE_MEMORY) {
++        qsv_decode->m_mfxVideoParam.NumExtParam     = qsv_decode->p_ext_param_num = 1;
++
++        qsv_decode->p_ext_params = av_mallocz(sizeof(mfxExtBuffer *)*qsv_decode->p_ext_param_num);
++        AV_QSV_CHECK_POINTER(qsv_decode->p_ext_params, AVERROR(ENOMEM));
++
++        qsv_decode->m_mfxVideoParam.ExtParam        = qsv_decode->p_ext_params;
++
++        qsv_decode->ext_opaque_alloc.Out.Surfaces   = qsv_decode->p_surfaces;
++        qsv_decode->ext_opaque_alloc.Out.NumSurface = qsv_decode->surface_num;
++        qsv_decode->ext_opaque_alloc.Out.Type       = qsv_decode->request[0].Type;
++
++        qsv_decode->ext_opaque_alloc.Header.BufferId    = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
++        qsv_decode->ext_opaque_alloc.Header.BufferSz    = sizeof(mfxExtOpaqueSurfaceAlloc);
++        qsv_decode->p_ext_params[0]                     = (mfxExtBuffer *) &qsv_decode->ext_opaque_alloc;
++    }
++
++    sts =
++        MFXVideoDECODE_Init(qsv->mfx_session,
++                            &qsv_decode->m_mfxVideoParam);
++
++    AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++
++    qsv_decode->is_init_done = 1;
++    return ret;
++}
++
++av_cold int ff_qsv_decode_init(AVCodecContext * avctx)
++{
++    av_qsv_context *qsv;
++    av_qsv_space *qsv_decode;
++    av_qsv_config **qsv_config_context =
++        (av_qsv_config **) & avctx->hwaccel_context;
++
++    qsv = avctx->priv_data;
++
++    if (qsv && qsv->dec_space && qsv->dec_space->is_init_done || !avctx->extradata_size)
++        return 0;
++
++    if(!qsv)
++    qsv = av_mallocz(sizeof(av_qsv_context));
++    if (!qsv)
++        return AVERROR(ENOMEM);
++
++    if(!qsv_decode)
++    qsv_decode = av_mallocz(sizeof(av_qsv_space));
++    if (!qsv_decode){
++        free(qsv);
++        return AVERROR(ENOMEM);
++    }
++    avctx->priv_data = qsv;
++    qsv->dec_space = qsv_decode;
++
++    qsv_decode->p_buf_max_size = AV_QSV_BUF_SIZE_DEFAULT;
++    if(!qsv_decode->p_buf)
++    qsv_decode->p_buf = av_malloc(qsv_decode->p_buf_max_size * sizeof(uint8_t));
++    if (!qsv_decode->p_buf)
++        return AVERROR(ENOMEM);
++
++    if (!(*qsv_config_context)) {
++        av_log(avctx, AV_LOG_INFO,
++               "Using default config for QSV decode\n");
++        avctx->hwaccel_context = &av_qsv_default_config;
++    } else {
++        if ((*qsv_config_context)->io_pattern !=
++            MFX_IOPATTERN_OUT_OPAQUE_MEMORY
++            && (*qsv_config_context)->io_pattern !=
++            MFX_IOPATTERN_OUT_SYSTEM_MEMORY) {
++            av_log_missing_feature( avctx,"Only MFX_IOPATTERN_OUT_OPAQUE_MEMORY and MFX_IOPATTERN_OUT_SYSTEM_MEMORY are currently supported\n",0);
++            return AVERROR_PATCHWELCOME;
++        }
++    }
++
++    qsv->qsv_config = avctx->hwaccel_context;
++
++    av_qsv_add_context_usage(qsv,
++                          HAVE_THREADS
++                          ? (*qsv_config_context)->usage_threaded :
++                          HAVE_THREADS);
++
++    // allocation of p_syncp and p_surfaces inside of ff_qsv_dec_init
++    return ff_qsv_dec_init(avctx);
++}
++
++static av_cold int qsv_decode_end(AVCodecContext * avctx)
++{
++    mfxStatus sts       = MFX_ERR_NONE;
++    av_qsv_context *qsv    = avctx->priv_data;
++    av_qsv_config *qsv_config_context = avctx->hwaccel_context;
++
++    if (qsv) {
++        av_qsv_space *qsv_decode = qsv->dec_space;
++        if (qsv_decode && qsv_decode->is_init_done) {
++            // todo: change to AV_LOG_INFO
++            av_log(avctx, AV_LOG_QUIET,
++                   "qsv_decode report done, max_surfaces: %u/%u , max_syncs: %u/%u\n",
++                   qsv_decode->surface_num_max_used,
++                   qsv_decode->surface_num, qsv_decode->sync_num_max_used,
++                   qsv_decode->sync_num);
++        }
++
++        if (qsv_config_context
++            && qsv_config_context->io_pattern ==
++            MFX_IOPATTERN_OUT_SYSTEM_MEMORY) {
++            if (qsv_config_context->allocators) {
++                sts =
++                    qsv_config_context->allocators->
++                    frame_alloc.Free(qsv_config_context->allocators,
++                                     &qsv_decode->response);
++                AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++            } else {
++                av_log(avctx, AV_LOG_FATAL,
++                       "No QSV allocators found for clean up\n");
++            }
++        }
++        // closing the own resources
++        av_freep(&qsv_decode->p_buf);
++
++        for (int i = 0; i < qsv_decode->surface_num; i++) {
++            av_freep(&qsv_decode->p_surfaces[i]);
++        }
++        qsv_decode->surface_num = 0;
++
++        if( qsv_decode->p_ext_param_num || qsv_decode->p_ext_params )
++            av_freep(&qsv_decode->p_ext_params);
++        qsv_decode->p_ext_param_num = 0;
++
++        for (int i = 0; i < qsv_decode->sync_num; i++) {
++            av_freep(&qsv_decode->p_syncp[i]->p_sync);
++            av_freep(&qsv_decode->p_syncp[i]);
++        }
++        qsv_decode->sync_num = 0;
++        qsv_decode->is_init_done = 0;
++
++        av_freep(&qsv->dec_space);
++
++        // closing commong stuff
++        av_qsv_context_clean(qsv);
++    }
++
++    return 0;
++}
++
++static int qsv_decode_frame(AVCodecContext * avctx, void *data,
++                            int *data_size, AVPacket * avpkt)
++{
++    mfxStatus sts = MFX_ERR_NONE;
++    av_qsv_context *qsv = avctx->priv_data;
++    av_qsv_space *qsv_decode;
++    av_qsv_config *qsv_config_context = avctx->hwaccel_context;
++    int *got_picture_ptr = data_size;
++    int ret_value = 1;
++    uint8_t *current_position = avpkt->data;
++    int current_size = avpkt->size;
++    int frame_processed = 0;
++    size_t frame_length = 0;
++    int surface_idx = 0;
++    int extra_data_workaround = 0;
++
++    int sync_idx = 0;
++    int current_nal_size;
++    unsigned char nal_type;
++    av_qsv_stage *new_stage = 0;
++    mfxBitstream *input_bs = NULL;
++    size_t current_offset = 2;
++    av_qsv_list *qsv_atom = 0;
++    av_qsv_list *pipe = 0;
++
++    AVFrame *picture = (AVFrame *) data;
++
++    *got_picture_ptr = 0;
++
++    qsv = avctx->priv_data;
++    if(!qsv){
++        extra_data_workaround = !avctx->extradata_size;
++        if(extra_data_workaround){
++            avctx->extradata = avpkt->data;
++            avctx->extradata_size = avpkt->size;
++        }
++        sts = ff_qsv_decode_init(avctx);
++        qsv = avctx->priv_data;
++        if(extra_data_workaround){
++            avctx->extradata = 0;
++            avctx->extradata_size = 0;
++        }
++        if(sts<0){
++             ff_qsv_dec_init_clean(avctx);
++             *got_picture_ptr = 0;
++             return sts;
++   	    }
++    }
++    qsv_decode = qsv->dec_space;
++
++    if (qsv_decode->bs.DataOffset + qsv_decode->bs.DataLength +
++        current_size > qsv_decode->bs.MaxLength) {
++        memmove(&qsv_decode->bs.Data[0],
++                qsv_decode->bs.Data + qsv_decode->bs.DataOffset,
++                qsv_decode->bs.DataLength);
++        qsv_decode->bs.DataOffset = 0;
++    }
++
++    if (current_size) {
++        if(qsv->is_anex == ANEX_UNKNOWN){
++            if (ff_qsv_nal_find_start_code(current_position, current_size) && current_position == avpkt->data)
++               qsv->is_anex = ANEX_PREFIX;
++            else
++               qsv->is_anex = ANEX_NO_PREFIX;
++        }
++        if (qsv->is_anex == ANEX_PREFIX){
++            memcpy(&qsv_decode->bs.Data[0] +
++                   qsv_decode->bs.DataLength +
++                   qsv_decode->bs.DataOffset,
++                   avpkt->data,
++                   avpkt->size);
++            qsv_decode->bs.DataLength += avpkt->size;
++            frame_length += avpkt->size;
++        }
++        else
++            while (current_offset <= current_size) {
++                current_nal_size =
++                    ((unsigned char) current_position[current_offset - 2] << 24 |
++                     (unsigned char) current_position[current_offset -  1] << 16 |
++                     (unsigned char) current_position[current_offset] << 8 |
++                     (unsigned char) current_position[current_offset + 1]) - 1;
++                nal_type =
++                    (unsigned char) current_position[current_offset + 2] & 0x1F;
++                {
++                    frame_length += current_nal_size;
++                    memcpy(&qsv_decode->bs.Data[0] +
++                           qsv_decode->bs.DataLength +
++                           qsv_decode->bs.DataOffset, ff_prefix_code,
++                           sizeof(ff_prefix_code));
++                    qsv_decode->bs.DataLength += sizeof(ff_prefix_code);
++                    memcpy(&qsv_decode->bs.Data[0] +
++                           qsv_decode->bs.DataLength +
++                           qsv_decode->bs.DataOffset,
++                           &current_position[current_offset + 2],
++                           current_nal_size + 1);
++                    qsv_decode->bs.DataLength += current_nal_size + 1;
++                }
++                current_offset += current_nal_size + 5;
++            }
++
++        if (qsv_decode->bs.DataLength > qsv_decode->bs.MaxLength) {
++            av_log(avctx, AV_LOG_FATAL, "DataLength > MaxLength\n");
++            return -1;
++        }
++    }
++
++    if (frame_length || current_size == 0) {
++
++        qsv_decode->bs.TimeStamp = avpkt->pts;
++
++        //not a drain
++        if ((current_size || qsv_decode->bs.DataLength))
++            av_qsv_dts_ordered_insert(qsv, 0, 0, qsv_decode->bs.TimeStamp, 0);
++
++        sts = MFX_ERR_NONE;
++        // ignore warnings, where warnings >0 , and not error codes <0
++        while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts
++               || MFX_WRN_DEVICE_BUSY == sts) {
++
++            if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) {
++                surface_idx =
++                    av_qsv_get_free_surface(qsv_decode, qsv,
++                                     &qsv_decode->request[0].Info,
++                                     QSV_PART_ANY);
++
++                if (surface_idx == -1) {
++                    *got_picture_ptr = 0;
++                    return 0;
++                }
++            }
++
++            if (MFX_WRN_DEVICE_BUSY == sts)
++                av_qsv_sleep(10);
++
++            sync_idx = av_qsv_get_free_sync(qsv_decode, qsv);
++
++            if (sync_idx == -1) {
++                *got_picture_ptr = 0;
++                return 0;
++            }
++            new_stage = av_qsv_stage_init();
++            input_bs = NULL;
++            // if to drain last ones
++            if (current_size || qsv_decode->bs.DataLength)
++                input_bs = &qsv_decode->bs;
++            // Decode a frame asynchronously (returns immediately)
++            // very first IDR / SLICE should be with SPS/PPS
++            sts = MFXVideoDECODE_DecodeFrameAsync(qsv->mfx_session, input_bs,
++                                                qsv_decode->p_surfaces
++                                                [surface_idx],
++                                                &new_stage->out.p_surface,
++                                                qsv_decode->p_syncp[sync_idx]->p_sync);
++
++            new_stage->out.sync = qsv_decode->p_syncp[sync_idx];
++            // have some results
++            if (MFX_ERR_NONE <= sts && MFX_WRN_DEVICE_BUSY != sts &&
++                MFX_WRN_VIDEO_PARAM_CHANGED != sts) {
++
++                ff_qsv_atomic_inc(&(new_stage->out.p_surface->Data.Locked));
++
++                new_stage->type         = AV_QSV_DECODE;
++                new_stage->in.p_bs      = input_bs;
++                new_stage->in.p_surface = qsv_decode->p_surfaces[surface_idx];
++
++                pipe = av_qsv_list_init(HAVE_THREADS ? qsv_config_context->usage_threaded : HAVE_THREADS);
++                av_qsv_add_stagee(&pipe, new_stage,
++                              HAVE_THREADS ?
++                              qsv_config_context->usage_threaded :
++                              HAVE_THREADS);
++
++                av_qsv_list_add(qsv->pipes, pipe);
++                qsv_atom = pipe;
++
++                // usage for forced decode sync and results, can be avoided if sync done by next stage
++                // also note wait time for Sync and possible usage with MFX_WRN_IN_EXECUTION check
++                if (qsv_config_context->sync_need) {
++                    sts =
++                        MFXVideoCORE_SyncOperation(qsv->mfx_session,
++                                                   qsv_decode->p_syncp[sync_idx]->p_sync,
++                                                   qsv_config_context->sync_need);
++                    AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++
++                    // no need to wait more -> force off
++                    ff_qsv_atomic_dec(&qsv_decode->p_syncp[sync_idx]->in_use);
++                    new_stage->out.sync   = 0;
++                }
++
++                sts = MFX_ERR_NONE;
++                break;
++            }
++            av_qsv_stage_clean(&new_stage);
++
++            /*
++               Can be because of:
++               - runtime situation:
++               - drain procedure:
++               At the end of the bitstream, the application continuously calls the MFXVideoDECODE_DecodeFrameAsync function with a
++               NULL bitstream pointer to drain any remaining frames cached within the Intel
++               Media SDK decoder, until the function returns MFX_ERR_MORE_DATA.
++             */
++            if (MFX_ERR_MORE_DATA == sts) {
++                // not a drain
++                if (current_size) {
++                    *got_picture_ptr = 0;
++                    return avpkt->size;
++                }
++                // drain
++                break;
++            }
++            if (MFX_ERR_MORE_SURFACE == sts  ){
++                continue;
++            }
++
++            AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
++        }
++
++        frame_processed = 1;
++    }
++
++    if (frame_processed) {
++
++        if (current_size) {
++            *got_picture_ptr    = 1;
++            ret_value           = avpkt->size;
++        } else {
++            if (MFX_ERR_MORE_DATA != sts) {
++                *got_picture_ptr    = 1;
++                ret_value           = avpkt->size;
++            } else {
++                *got_picture_ptr = 0;
++                return 0;
++            }
++        }
++
++        picture->pkt_pts            = new_stage->out.p_surface->Data.TimeStamp;
++        picture->pts                = new_stage->out.p_surface->Data.TimeStamp;
++
++        picture->repeat_pict        = (qsv_decode->m_mfxVideoParam.mfx.FrameInfo.PicStruct & MFX_PICSTRUCT_FIELD_REPEATED);
++        picture->interlaced_frame   = !(qsv_decode->m_mfxVideoParam.mfx.FrameInfo.PicStruct & MFX_PICSTRUCT_PROGRESSIVE);
++        picture->top_field_first    = (qsv_decode->m_mfxVideoParam.mfx.FrameInfo.PicStruct & MFX_PICSTRUCT_FIELD_TFF);
++
++        // since we do not know it yet from MSDK, let's do just a simple way for now
++        picture->key_frame          = (avctx->frame_number == 0) ? 1 : 0;
++
++        if (qsv_decode->m_mfxVideoParam.IOPattern == MFX_IOPATTERN_OUT_SYSTEM_MEMORY) {
++            picture->data[0]        = new_stage->out.p_surface->Data.Y;
++            picture->data[1]        = new_stage->out.p_surface->Data.VU;
++            picture->linesize[0]    = new_stage->out.p_surface->Info.Width;
++            picture->linesize[1]    = new_stage->out.p_surface->Info.Width;
++        } else {
++            picture->data[0]        = 0;
++            picture->data[1]        = 0;
++            picture->linesize[0]    = 0;
++            picture->linesize[1]    = 0;
++        }
++
++        picture->data[2]            = qsv_atom;
++        picture->linesize[2]        = 0;
++    }
++
++    return ret_value;
++}
++
++// Will be called when seeking
++static void qsv_flush_dpb(AVCodecContext * avctx)
++{
++    av_qsv_context *qsv = avctx->priv_data;
++    av_qsv_space *qsv_decode = qsv->dec_space;
++
++    qsv_decode->bs.DataOffset = 0;
++    qsv_decode->bs.DataLength = 0;
++    qsv_decode->bs.MaxLength = qsv_decode->p_buf_max_size;
++}
++
++
++mfxStatus ff_qsv_mem_frame_alloc(mfxHDL pthis,
++                                 mfxFrameAllocRequest * request,
++                                 mfxFrameAllocResponse * response)
++{
++    mfxStatus sts = MFX_ERR_NONE;
++
++    mfxU32 numAllocated = 0;
++
++    mfxU32 width = AV_QSV_ALIGN32(request->Info.Width);
++    mfxU32 height = AV_QSV_ALIGN32(request->Info.Height);
++    mfxU32 nbytes;
++
++    av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis;
++    av_qsv_alloc_frame *fs;
++
++    if (!this_alloc->space)
++        return MFX_ERR_NOT_INITIALIZED;
++
++    switch (request->Info.FourCC) {
++    case MFX_FOURCC_YV12:
++    case MFX_FOURCC_NV12:
++        nbytes =
++            width * height + (width >> 1) * (height >> 1) +
++            (width >> 1) * (height >> 1);
++        break;
++    case MFX_FOURCC_RGB3:
++        nbytes = width * height + width * height + width * height;
++        break;
++    case MFX_FOURCC_RGB4:
++        nbytes =
++            width * height + width * height + width * height +
++            width * height;
++        break;
++    case MFX_FOURCC_YUY2:
++        nbytes =
++            width * height + (width >> 1) * (height) +
++            (width >> 1) * (height);
++        break;
++    default:
++        return MFX_ERR_UNSUPPORTED;
++    }
++
++    this_alloc->space->mids =
++        av_malloc(sizeof(mfxMemId) * request->NumFrameSuggested);
++    if (!this_alloc->space->mids)
++        return MFX_ERR_MEMORY_ALLOC;
++
++    // allocate frames
++    for (numAllocated = 0; numAllocated < request->NumFrameSuggested;
++         numAllocated++) {
++        sts =
++            this_alloc->buffer_alloc.Alloc(this_alloc->buffer_alloc.pthis,
++                                           nbytes +
++                                           AV_QSV_ALIGN32(sizeof
++                                                       (av_qsv_alloc_frame)),
++                                           request->Type,
++                                           &(this_alloc->
++                                             space->mids[numAllocated]));
++
++        if (MFX_ERR_NONE != sts)
++            break;
++
++        sts =
++            this_alloc->buffer_alloc.Lock(this_alloc->buffer_alloc.pthis,
++                                          this_alloc->
++                                          space->mids[numAllocated],
++                                          (mfxU8 **) & fs);
++
++        if (MFX_ERR_NONE != sts)
++            break;
++
++        fs->id = AV_QSV_ID_FRAME;
++        fs->info = request->Info;
++        this_alloc->buffer_alloc.Unlock(this_alloc->buffer_alloc.pthis,
++                                        this_alloc->
++                                        space->mids[numAllocated]);
++    }
++
++    // check the number of allocated frames
++    if (numAllocated < request->NumFrameMin)
++        return MFX_ERR_MEMORY_ALLOC;
++
++    response->NumFrameActual = (mfxU16) numAllocated;
++    response->mids = this_alloc->space->mids;
++
++    return MFX_ERR_NONE;
++}
++
++mfxStatus ff_qsv_mem_frame_lock(mfxHDL pthis, mfxMemId mid,
++                                mfxFrameData * ptr)
++{
++    mfxStatus sts = MFX_ERR_NONE;
++    av_qsv_alloc_frame *fs = 0;
++    mfxU16 width;
++    mfxU16 height;
++
++    av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis;
++
++    if (!this_alloc->space)
++        return MFX_ERR_NOT_INITIALIZED;
++    if (!ptr)
++        return MFX_ERR_NULL_PTR;
++
++
++    sts =
++        this_alloc->buffer_alloc.Lock(this_alloc->buffer_alloc.pthis, mid,
++                                      (mfxU8 **) & fs);
++
++    if (MFX_ERR_NONE != sts)
++        return sts;
++
++    if (AV_QSV_ID_FRAME != fs->id) {
++        this_alloc->buffer_alloc.Unlock(this_alloc->buffer_alloc.pthis,
++                                        mid);
++        return MFX_ERR_INVALID_HANDLE;
++    }
++
++    width = (mfxU16) AV_QSV_ALIGN32(fs->info.Width);
++    height = (mfxU16) AV_QSV_ALIGN32(fs->info.Height);
++    ptr->B = ptr->Y =
++        (mfxU8 *) fs + AV_QSV_ALIGN32(sizeof(av_qsv_allocators_space));
++
++    switch (fs->info.FourCC) {
++    case MFX_FOURCC_NV12:
++        ptr->U = ptr->Y + width * height;
++        ptr->V = ptr->U + 1;
++        ptr->Pitch = width;
++        break;
++    case MFX_FOURCC_YV12:
++        ptr->V = ptr->Y + width * height;
++        ptr->U = ptr->V + (width >> 1) * (height >> 1);
++        ptr->Pitch = width;
++        break;
++    case MFX_FOURCC_YUY2:
++        ptr->U = ptr->Y + 1;
++        ptr->V = ptr->Y + 3;
++        ptr->Pitch = 2 * width;
++        break;
++    case MFX_FOURCC_RGB3:
++        ptr->G = ptr->B + 1;
++        ptr->R = ptr->B + 2;
++        ptr->Pitch = 3 * width;
++        break;
++    case MFX_FOURCC_RGB4:
++        ptr->G = ptr->B + 1;
++        ptr->R = ptr->B + 2;
++        ptr->A = ptr->B + 3;
++        ptr->Pitch = 4 * width;
++        break;
++    default:
++        return MFX_ERR_UNSUPPORTED;
++    }
++
++    return MFX_ERR_NONE;
++}
++
++mfxStatus ff_qsv_mem_frame_unlock(mfxHDL pthis, mfxMemId mid,
++                                  mfxFrameData * ptr)
++{
++    mfxStatus sts = MFX_ERR_NONE;
++    av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis;
++
++    sts =
++        this_alloc->buffer_alloc.Unlock(this_alloc->buffer_alloc.pthis,
++                                        mid);
++
++    if (MFX_ERR_NONE != sts)
++        return sts;
++
++    if (NULL != ptr) {
++        ptr->Pitch = 0;
++        ptr->Y = 0;
++        ptr->U = 0;
++        ptr->V = 0;
++    }
++
++    return MFX_ERR_NONE;
++}
++
++mfxStatus ff_qsv_mem_frame_getHDL(mfxHDL pthis, mfxMemId mid,
++                                  mfxHDL * handle)
++{
++    return MFX_ERR_UNSUPPORTED;
++}
++
++mfxStatus ff_qsv_mem_frame_free(mfxHDL pthis,
++                                mfxFrameAllocResponse * response)
++{
++    mfxStatus sts = MFX_ERR_NONE;
++    av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis;
++    mfxU32 i;
++
++    if (!response)
++        return MFX_ERR_NULL_PTR;
++
++    if (!this_alloc->space)
++        return MFX_ERR_NOT_INITIALIZED;
++
++    if (response->mids)
++        for (i = 0; i < response->NumFrameActual; i++) {
++            if (response->mids[i]) {
++                sts =
++                    this_alloc->buffer_alloc.Free(this_alloc->
++                                                  buffer_alloc.pthis,
++                                                  response->mids[i]);
++                if (MFX_ERR_NONE != sts)
++                    return sts;
++            }
++        }
++
++    av_freep(&response->mids);
++
++    return sts;
++}
++
++
++mfxStatus ff_qsv_mem_buffer_alloc(mfxHDL pthis, mfxU32 nbytes, mfxU16 type,
++                                  mfxMemId * mid)
++{
++    av_qsv_alloc_buffer *bs;
++    mfxU32 header_size;
++    mfxU8 *buffer_ptr;
++
++    if (!mid)
++        return MFX_ERR_NULL_PTR;
++
++    if (0 == (type & MFX_MEMTYPE_SYSTEM_MEMORY))
++        return MFX_ERR_UNSUPPORTED;
++
++    header_size = AV_QSV_ALIGN32(sizeof(av_qsv_alloc_buffer));
++    buffer_ptr = (mfxU8 *) av_malloc(header_size + nbytes);
++
++    if (!buffer_ptr)
++        return MFX_ERR_MEMORY_ALLOC;
++
++    bs = (av_qsv_alloc_buffer *) buffer_ptr;
++    bs->id = AV_QSV_ID_BUFFER;
++    bs->type = type;
++    bs->nbytes = nbytes;
++    *mid = (mfxHDL) bs;
++    return MFX_ERR_NONE;
++}
++
++mfxStatus ff_qsv_mem_buffer_lock(mfxHDL pthis, mfxMemId mid, mfxU8 ** ptr)
++{
++    av_qsv_alloc_buffer *bs;
++
++    if (!ptr)
++        return MFX_ERR_NULL_PTR;
++
++    bs = (av_qsv_alloc_buffer *) mid;
++
++    if (!bs)
++        return MFX_ERR_INVALID_HANDLE;
++    if (AV_QSV_ID_BUFFER != bs->id)
++        return MFX_ERR_INVALID_HANDLE;
++
++    *ptr = (mfxU8 *) bs + AV_QSV_ALIGN32(sizeof(av_qsv_alloc_buffer));
++    return MFX_ERR_NONE;
++}
++
++mfxStatus ff_qsv_mem_buffer_unlock(mfxHDL pthis, mfxMemId mid)
++{
++    av_qsv_alloc_buffer *bs = (av_qsv_alloc_buffer *) mid;
++
++    if (!bs || AV_QSV_ID_BUFFER != bs->id)
++        return MFX_ERR_INVALID_HANDLE;
++
++    return MFX_ERR_NONE;
++}
++
++mfxStatus ff_qsv_mem_buffer_free(mfxHDL pthis, mfxMemId mid)
++{
++    av_qsv_alloc_buffer *bs = (av_qsv_alloc_buffer *) mid;
++    if (!bs || AV_QSV_ID_BUFFER != bs->id)
++        return MFX_ERR_INVALID_HANDLE;
++
++    av_freep(&bs);
++    return MFX_ERR_NONE;
++}
++
++
++AVCodec ff_h264_qsv_decoder = {
++    .name           = "h264_qsv",
++    .type           = AVMEDIA_TYPE_VIDEO,
++    .id             = AV_CODEC_ID_H264,
++    .init           = ff_qsv_decode_init,
++    .close          = qsv_decode_end,
++    .decode         = qsv_decode_frame,
++    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
++    .flush          = qsv_flush_dpb,
++    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / Intel QSV"),
++    .pix_fmts       = (const enum PixelFormat[]) {AV_PIX_FMT_QSV_H264,
++                                                  AV_PIX_FMT_NONE},
++};
+diff -Naur ../../libav-v9.6/libavcodec/qsv_h264.h ./libavcodec/qsv_h264.h
+--- ../../libav-v9.6/libavcodec/qsv_h264.h	1970-01-01 01:00:00.000000000 +0100
++++ ./libavcodec/qsv_h264.h	2013-08-19 21:32:01.710244809 +0200
+@@ -0,0 +1,65 @@
++/* ********************************************************************* *\
++
++Copyright (C) 2013 Intel Corporation.  All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++- Redistributions of source code must retain the above copyright notice,
++this list of conditions and the following disclaimer.
++- Redistributions in binary form must reproduce the above copyright notice,
++this list of conditions and the following disclaimer in the documentation
++and/or other materials provided with the distribution.
++- Neither the name of Intel Corporation nor the names of its contributors
++may be used to endorse or promote products derived from this software
++without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR
++IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
++OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
++IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT,
++INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
++NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
++THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++\* ********************************************************************* */
++
++#ifndef AVCODEC_QSV_H264_H
++#define AVCODEC_QSV_H264_H
++
++#include "qsv.h"
++
++int ff_qsv_dec_init(AVCodecContext *);
++int ff_qsv_nal_find_start_code(uint8_t * pb, size_t size);
++
++int ff_qsv_dec_init_clean(AVCodecContext *avctx);
++av_cold int ff_qsv_decode_init(AVCodecContext * avctx);
++static av_cold int qsv_decode_end(AVCodecContext * avctx);
++static int qsv_decode_frame(AVCodecContext * avctx, void *data,
++                            int *data_size, AVPacket * avpkt);
++static void qsv_flush_dpb(AVCodecContext * avctx);
++
++
++// Default for SYSTEM MEMORY
++// as from MFXFrameAllocator
++mfxStatus ff_qsv_mem_frame_alloc(mfxHDL pthis,
++                                 mfxFrameAllocRequest * request,
++                                 mfxFrameAllocResponse * response);
++mfxStatus ff_qsv_mem_frame_lock(mfxHDL pthis, mfxMemId mid,
++                                mfxFrameData * ptr);
++mfxStatus ff_qsv_mem_frame_unlock(mfxHDL pthis, mfxMemId mid,
++                                  mfxFrameData * ptr);
++mfxStatus ff_qsv_mem_frame_getHDL(mfxHDL pthis, mfxMemId mid,
++                                  mfxHDL * handle);
++mfxStatus ff_qsv_mem_frame_free(mfxHDL pthis,
++                                mfxFrameAllocResponse * response);
++// as from mfxBufferAllocator
++mfxStatus ff_qsv_mem_buffer_alloc(mfxHDL pthis, mfxU32 nbytes, mfxU16 type,
++                                  mfxMemId * mid);
++mfxStatus ff_qsv_mem_buffer_lock(mfxHDL pthis, mfxMemId mid, mfxU8 ** ptr);
++mfxStatus ff_qsv_mem_buffer_unlock(mfxHDL pthis, mfxMemId mid);
++mfxStatus ff_qsv_mem_buffer_free(mfxHDL pthis, mfxMemId mid);
++
++#endif                          //AVCODEC_QSV_H264_H
+diff -Naur ../../libav-v9.6/libavutil/pixfmt.h ./libavutil/pixfmt.h
+--- ../../libav-v9.6/libavutil/pixfmt.h	2013-05-12 08:39:07.000000000 +0200
++++ ./libavutil/pixfmt.h	2013-08-14 10:48:00.522497405 +0200
+@@ -178,6 +178,7 @@
+     AV_PIX_FMT_YUVA422P16LE, ///< planar YUV 4:2:2 48bpp, (1 Cr & Cb sample per 2x1 Y & A samples, little-endian)
+     AV_PIX_FMT_YUVA444P16BE, ///< planar YUV 4:4:4 64bpp, (1 Cr & Cb sample per 1x1 Y & A samples, big-endian)
+     AV_PIX_FMT_YUVA444P16LE, ///< planar YUV 4:4:4 64bpp, (1 Cr & Cb sample per 1x1 Y & A samples, little-endian)
++    AV_PIX_FMT_QSV_H264,  ///< H.264 HW decoding with QSV, data[2] contains qsv_atom information for MFX_IOPATTERN_OUT_OPAQUE_MEMORY, MFX_IOPATTERN_OUT_VIDEO_MEMORY
+     AV_PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
+ 
+ #if FF_API_PIX_FMT
diff --git a/contrib/libmfx/module.defs b/contrib/libmfx/module.defs
new file mode 100644
index 000000000..4377123b9
--- /dev/null
+++ b/contrib/libmfx/module.defs
@@ -0,0 +1,6 @@
+$(eval $(call import.MODULE.defs,LIBMFX,libmfx))
+$(eval $(call import.CONTRIB.defs,LIBMFX))
+
+LIBMFX.FETCH.url = http://download.handbrake.fr/contrib/libmfx_intel_msdk_2013r2.tar.bz2
+
+LIBMFX.CONFIGURE.bootstrap = rm -fr aclocal.m4 autom4te.cache; autoreconf -fiv;
diff --git a/contrib/libmfx/module.rules b/contrib/libmfx/module.rules
new file mode 100644
index 000000000..ffa581f04
--- /dev/null
+++ b/contrib/libmfx/module.rules
@@ -0,0 +1,2 @@
+$(eval $(call import.MODULE.rules,LIBMFX))
+$(eval $(call import.CONTRIB.rules,LIBMFX))