diff --git a/configure b/configure index 5085333..7283fb1 100755 --- a/configure +++ b/configure @@ -134,6 +134,7 @@ Component options: Hardware accelerators: --enable-dxva2 enable DXVA2 code + --enable-qsv enable QSV code --enable-vaapi enable VAAPI code --enable-vda enable VDA code --enable-vdpau enable VDPAU code @@ -1198,6 +1199,7 @@ FEATURE_LIST=" HWACCEL_LIST=" dxva2 + qsv vaapi vda vdpau @@ -1939,6 +1941,7 @@ zmbv_encoder_deps="zlib" # hardware accelerators dxva2_deps="dxva2api_h" +qsv_deps="mfx_mfxvideo_h" vaapi_deps="va_va_h" vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads" vda_extralibs="-framework CoreFoundation -framework VideoDecodeAcceleration -framework QuartzCore" @@ -1950,6 +1953,8 @@ h263_vdpau_hwaccel_deps="vdpau" h263_vdpau_hwaccel_select="h263_decoder" h264_dxva2_hwaccel_deps="dxva2" h264_dxva2_hwaccel_select="h264_decoder" +h264_qsv_decoder_deps="qsv" +h264_qsv_decoder_select="h264_decoder" h264_vaapi_hwaccel_deps="vaapi" h264_vaapi_hwaccel_select="h264_decoder" h264_vda_hwaccel_deps="vda" @@ -4115,6 +4120,7 @@ check_header dxva2api.h check_header io.h check_header mach/mach_time.h check_header malloc.h +check_header mfx/mfxvideo.h check_header poll.h check_header sys/mman.h check_header sys/param.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 419dcb6..5208d5b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -4,6 +4,7 @@ HEADERS = avcodec.h \ avfft.h \ dv_profile.h \ dxva2.h \ + qsv.h \ vaapi.h \ vda.h \ vdpau.h \ @@ -75,6 +76,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ motion_est.o ratecontrol.o \ mpegvideoencdsp.o +OBJS-$(CONFIG_QSV) += qsv.o OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o OBJS-$(CONFIG_RANGECODER) += rangecoder.o @@ -216,6 +218,7 @@ OBJS-$(CONFIG_H264_DECODER) += h264.o h264_cabac.o h264_cavlc.o \ h264_direct.o h264_loopfilter.o \ h264_mb.o h264_picture.o h264_ps.o \ h264_refs.o h264_sei.o h264_slice.o +OBJS-$(CONFIG_H264_QSV_DECODER) += qsv_h264.o OBJS-$(CONFIG_HEVC_DECODER) += hevc.o hevc_mvs.o hevc_ps.o hevc_sei.o \ hevc_cabac.o hevc_refs.o hevcpred.o \ hevcdsp.o hevc_filter.o @@ -712,6 +715,7 @@ SKIPHEADERS += %_tablegen.h \ SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER) += libschroedinger.h SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h +SKIPHEADERS-$(CONFIG_QSV) += qsv.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_internal.h SKIPHEADERS-$(CONFIG_VDA) += vda.h vda_internal.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 0d46afe..b3a5dd0 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -159,6 +159,7 @@ void avcodec_register_all(void) REGISTER_DECODER(H263I, h263i); REGISTER_ENCODER(H263P, h263p); REGISTER_DECODER(H264, h264); + REGISTER_DECODER(H264_QSV, h264_qsv); REGISTER_DECODER(HEVC, hevc); REGISTER_DECODER(HNM4_VIDEO, hnm4_video); REGISTER_ENCDEC (HUFFYUV, huffyuv); diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c new file mode 100644 index 0000000..2c46111 --- /dev/null +++ b/libavcodec/qsv.c @@ -0,0 +1,646 @@ +/* ********************************************************************* *\ + +Copyright (C) 2013 Intel Corporation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +- Neither the name of Intel Corporation nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +\* ********************************************************************* */ + +#include "qsv.h" + +#include "avcodec.h" +#include "internal.h" + +int av_qsv_get_free_encode_task(av_qsv_list * tasks) +{ + int ret = MFX_ERR_NOT_FOUND; + int i = 0; + if (tasks) + for (i = 0; i < av_qsv_list_count(tasks); i++) { + av_qsv_task *task = av_qsv_list_item(tasks, i); + if (task->stage && task->stage->out.sync) + if (!(*task->stage->out.sync->p_sync)) { + ret = i; + break; + } + } + return ret; +} + +int av_qsv_get_free_sync(av_qsv_space * space, av_qsv_context * qsv) +{ + int ret = -1; + int counter = 0; + + while (1) { + for (int i = 0; i < space->sync_num; i++) { + if (!(*(space->p_syncp[i]->p_sync)) && + 0 == space->p_syncp[i]->in_use ) { + if (i > space->sync_num_max_used) + space->sync_num_max_used = i; + ff_qsv_atomic_inc(&space->p_syncp[i]->in_use); + return i; + } + } +#if HAVE_THREADS + if (++counter >= AV_QSV_REPEAT_NUM_DEFAULT) { +#endif + av_log(NULL, AV_LOG_FATAL, "not enough to have %d sync point(s) allocated\n", + space->sync_num); + break; +#if HAVE_THREADS + } + av_qsv_sleep(5); +#endif + } + return ret; +} + +int av_qsv_get_free_surface(av_qsv_space * space, av_qsv_context * qsv, + mfxFrameInfo * info, av_qsv_split part) +{ + int ret = -1; + int from = 0; + int up = space->surface_num; + int counter = 0; + + while (1) { + from = 0; + up = space->surface_num; + if (part == QSV_PART_LOWER) + up /= 2; + if (part == QSV_PART_UPPER) + from = up / 2; + + for (int i = from; i < up; i++) { + if (0 == space->p_surfaces[i]->Data.Locked) { + memcpy(&(space->p_surfaces[i]->Info), info, + sizeof(mfxFrameInfo)); + if (i > space->surface_num_max_used) + space->surface_num_max_used = i; + return i; + } + } +#if HAVE_THREADS + if (++counter >= AV_QSV_REPEAT_NUM_DEFAULT) { +#endif + av_log(NULL, AV_LOG_FATAL, + "not enough to have %d surface(s) allocated\n", up); + break; +#if HAVE_THREADS + } + av_qsv_sleep(5); +#endif + } + return ret; +} + +int ff_qsv_is_surface_in_pipe(mfxFrameSurface1 * p_surface, av_qsv_context * qsv) +{ + int ret = 0; + int a, b,i; + av_qsv_list *list = 0; + av_qsv_stage *stage = 0; + + if (!p_surface) + return ret; + if (!qsv->pipes) + return ret; + + for (a = 0; a < av_qsv_list_count(qsv->pipes); a++) { + list = av_qsv_list_item(qsv->pipes, a); + for (b = 0; b < av_qsv_list_count(list); b++) { + stage = av_qsv_list_item(list, b); + if (p_surface == stage->out.p_surface) + return (stage->type << 16) | 2; + if (p_surface == stage->in.p_surface) + return (stage->type << 16) | 1; + } + } + return ret; +} + +int ff_qsv_is_sync_in_pipe(mfxSyncPoint * sync, av_qsv_context * qsv) +{ + int ret = 0; + int a, b; + av_qsv_list *list = 0; + av_qsv_stage *stage = 0; + + if (!sync) + return ret; + if (!qsv->pipes) + return ret; + + for (a = 0; a < av_qsv_list_count(qsv->pipes); a++) { + list = av_qsv_list_item(qsv->pipes, a); + for (b = 0; b < av_qsv_list_count(list); b++) { + stage = av_qsv_list_item(list, b); + if (sync == stage->out.sync->p_sync) { + return 1; + } + } + } + return ret; +} + +av_qsv_stage *av_qsv_stage_init(void) +{ + av_qsv_stage *stage = av_mallocz(sizeof(av_qsv_stage)); + return stage; +} + +void av_qsv_stage_clean(av_qsv_stage ** stage) +{ + if ((*stage)->out.sync) { + if ((*stage)->out.sync->p_sync) + *(*stage)->out.sync->p_sync = 0; + if ((*stage)->out.sync->in_use > 0) + ff_qsv_atomic_dec(&(*stage)->out.sync->in_use); + (*stage)->out.sync = 0; + } + if ((*stage)->out.p_surface) { + (*stage)->out.p_surface = 0; + + } + if ((*stage)->in.p_surface) { + (*stage)->in.p_surface = 0; + } + + av_freep(stage); +} + +void av_qsv_add_context_usage(av_qsv_context * qsv, int is_threaded) +{ + int is_active = 0; +#if HAVE_THREADS + int mut_ret = 0; +#endif + + is_active = ff_qsv_atomic_inc(&qsv->is_context_active); + if (is_active == 1) { + memset(&qsv->mfx_session, 0, sizeof(mfxSession)); + av_qsv_pipe_list_create(&qsv->pipes, is_threaded); + + qsv->dts_seq = av_qsv_list_init(is_threaded); + +#if HAVE_THREADS + if (is_threaded) { + qsv->qts_seq_mutex = av_mallocz(sizeof(pthread_mutex_t)); + if (qsv->qts_seq_mutex){ + mut_ret = pthread_mutex_init(qsv->qts_seq_mutex, NULL); + if(mut_ret) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_init issue[%d] at %s\n",mut_ret,__FUNCTION__); + } + + } else +#endif + qsv->qts_seq_mutex = 0; + } +} + +int av_qsv_context_clean(av_qsv_context * qsv) +{ + int is_active = 0; + mfxStatus sts = MFX_ERR_NONE; +#if HAVE_THREADS + int mut_ret = 0; +#endif + + is_active = ff_qsv_atomic_dec(&qsv->is_context_active); + + // spaces would have to be cleaned on the own, + // here we care about the rest, common stuff + if (is_active == 0) { + + if (qsv->dts_seq) { + while (av_qsv_list_count(qsv->dts_seq)) + av_qsv_dts_pop(qsv); + + av_qsv_list_close(&qsv->dts_seq); + } +#if HAVE_THREADS + if (qsv->qts_seq_mutex) { + mut_ret = pthread_mutex_destroy(qsv->qts_seq_mutex); + if(mut_ret) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_destroy issue[%d] at %s\n", mut_ret,__FUNCTION__); +#endif + qsv->qts_seq_mutex = 0; +#if HAVE_THREADS + } +#endif + + if (qsv->pipes) + av_qsv_pipe_list_clean(&qsv->pipes); + + if (qsv->mfx_session) { + sts = MFXClose(qsv->mfx_session); + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + qsv->mfx_session = 0; + } + } + return 0; +} + +void av_qsv_pipe_list_create(av_qsv_list ** list, int is_threaded) +{ + if (!*list) + *list = av_qsv_list_init(is_threaded); +} + +void av_qsv_pipe_list_clean(av_qsv_list ** list) +{ + av_qsv_list *stage; + int i = 0; + if (*list) { + for (i = av_qsv_list_count(*list); i > 0; i--) { + stage = av_qsv_list_item(*list, i - 1); + av_qsv_flush_stages(*list, &stage); + } + av_qsv_list_close(list); + } +} + +void av_qsv_add_stagee(av_qsv_list ** list, av_qsv_stage * stage, int is_threaded) +{ + if (!*list) + *list = av_qsv_list_init(is_threaded); + av_qsv_list_add(*list, stage); +} + +av_qsv_stage *av_qsv_get_last_stage(av_qsv_list * list) +{ + av_qsv_stage *stage = 0; + int size = 0; + + av_qsv_list_lock(list); + size = av_qsv_list_count(list); + if (size > 0) + stage = av_qsv_list_item(list, size - 1); + av_qsv_list_unlock(list); + + return stage; +} + +void av_qsv_flush_stages(av_qsv_list * list, av_qsv_list ** item) +{ + int i = 0; + int x = 0; + av_qsv_stage *stage = 0; + av_qsv_list *to_remove_list = 0; + av_qsv_list *to_remove_atom_list = 0; + av_qsv_list *to_remove_atom = 0; + + for (i = 0; i < av_qsv_list_count(*item); i++) { + stage = av_qsv_list_item(*item, i); + if(stage->pending){ + if(!to_remove_list) + to_remove_list = av_qsv_list_init(0); + av_qsv_list_add(to_remove_list, stage->pending); + } + av_qsv_stage_clean(&stage); + // should actually remove from the list but ok... + } + av_qsv_list_rem(list, *item); + av_qsv_list_close(item); + + if(to_remove_list){ + for (i = av_qsv_list_count(to_remove_list); i > 0; i--){ + to_remove_atom_list = av_qsv_list_item(to_remove_list, i-1); + for (x = av_qsv_list_count(to_remove_atom_list); x > 0; x--){ + to_remove_atom = av_qsv_list_item(to_remove_atom_list, x-1); + av_qsv_flush_stages(list,&to_remove_atom); + } + } + av_qsv_list_close(&to_remove_list); + } +} + +av_qsv_list *av_qsv_pipe_by_stage(av_qsv_list * list, av_qsv_stage * stage) +{ + av_qsv_list *item = 0; + av_qsv_stage *cur_stage = 0; + int i = 0; + int a = 0; + for (i = 0; i < av_qsv_list_count(list); i++) { + item = av_qsv_list_item(list, i); + for (a = 0; a < av_qsv_list_count(item); a++) { + cur_stage = av_qsv_list_item(item, a); + if (cur_stage == stage) + return item; + } + } + return 0; +} + +// no duplicate of the same value, if end == 0 : working over full length +void av_qsv_dts_ordered_insert(av_qsv_context * qsv, int start, int end, + int64_t dts, int iter) +{ + av_qsv_dts *cur_dts = 0; + av_qsv_dts *new_dts = 0; + int i = 0; +#if HAVE_THREADS + int mut_ret = 0; +#endif + + +#if HAVE_THREADS + if (iter == 0 && qsv->qts_seq_mutex){ + mut_ret = pthread_mutex_lock(qsv->qts_seq_mutex); + if(mut_ret) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",mut_ret, __FUNCTION__); + } +#endif + + if (end == 0) + end = av_qsv_list_count(qsv->dts_seq); + + if (end <= start) { + new_dts = av_mallocz(sizeof(av_qsv_dts)); + if( new_dts ) { + new_dts->dts = dts; + av_qsv_list_add(qsv->dts_seq, new_dts); + } + } else + for (i = end; i > start; i--) { + cur_dts = av_qsv_list_item(qsv->dts_seq, i - 1); + if (cur_dts->dts < dts) { + new_dts = av_mallocz(sizeof(av_qsv_dts)); + if( new_dts ) { + new_dts->dts = dts; + av_qsv_list_insert(qsv->dts_seq, i, new_dts); + } + break; + } else if (cur_dts->dts == dts) + break; + } +#if HAVE_THREADS + if (iter == 0 && qsv->qts_seq_mutex){ + mut_ret = pthread_mutex_unlock(qsv->qts_seq_mutex); + if(mut_ret) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_unlock issue[%d] at %s\n",mut_ret, __FUNCTION__); + } +#endif +} + +void av_qsv_dts_pop(av_qsv_context * qsv) +{ + av_qsv_dts *item = 0; +#if HAVE_THREADS + int mut_ret = 0; +#endif + +#if HAVE_THREADS + if (qsv && qsv->qts_seq_mutex){ + mut_ret = pthread_mutex_lock(qsv->qts_seq_mutex); + if(mut_ret) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",mut_ret, __FUNCTION__); + } +#endif + + if (av_qsv_list_count(qsv->dts_seq)) { + item = av_qsv_list_item(qsv->dts_seq, 0); + av_qsv_list_rem(qsv->dts_seq, item); + av_free(item); + } +#if HAVE_THREADS + if (qsv && qsv->qts_seq_mutex){ + mut_ret = pthread_mutex_unlock(qsv->qts_seq_mutex); + if(mut_ret) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",mut_ret, __FUNCTION__); + } +#endif +} + + +av_qsv_list *av_qsv_list_init(int is_threaded) +{ + av_qsv_list *l; +#if HAVE_THREADS + int mut_ret; +#endif + + l = av_mallocz(sizeof(av_qsv_list)); + if (!l) + return 0; + l->items = av_mallocz(AV_QSV_JOB_SIZE_DEFAULT * sizeof(void *)); + if (!l->items) + return 0; + l->items_alloc = AV_QSV_JOB_SIZE_DEFAULT; + +#if HAVE_THREADS + if (is_threaded) { + l->mutex = av_mallocz(sizeof(pthread_mutex_t)); + if (l->mutex){ + mut_ret = pthread_mutexattr_init(&l->mta); + if( mut_ret ) + av_log(NULL, AV_LOG_ERROR, "pthread_mutexattr_init issue[%d] at %s\n",mut_ret, __FUNCTION__); + mut_ret = pthread_mutexattr_settype(&l->mta, PTHREAD_MUTEX_RECURSIVE /*PTHREAD_MUTEX_ERRORCHECK*/); + if( mut_ret ) + av_log(NULL, AV_LOG_ERROR, "pthread_mutexattr_settype issue[%d] at %s\n",mut_ret, __FUNCTION__); + mut_ret = pthread_mutex_init(l->mutex, &l->mta); + if( mut_ret ) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_init issue[%d] at %s\n",mut_ret, __FUNCTION__); + } + } else +#endif + l->mutex = 0; + return l; +} + +int av_qsv_list_count(av_qsv_list * l) +{ + int count; + + av_qsv_list_lock(l); + count = l->items_count; + av_qsv_list_unlock(l); + return count; +} + +int av_qsv_list_add(av_qsv_list * l, void *p) +{ + int pos = -1; + + if (!p) { + return pos; + } + + av_qsv_list_lock(l); + + if (l->items_count == l->items_alloc) { + /* We need a bigger boat */ + l->items_alloc += AV_QSV_JOB_SIZE_DEFAULT; + l->items = av_realloc(l->items, l->items_alloc * sizeof(void *)); + } + + l->items[l->items_count] = p; + pos = (l->items_count); + l->items_count++; + + av_qsv_list_unlock(l); + + return pos; +} + +void av_qsv_list_rem(av_qsv_list * l, void *p) +{ + int i; + + av_qsv_list_lock(l); + + /* Find the item in the list */ + for (i = 0; i < l->items_count; i++) { + if (l->items[i] == p) { + /* Shift all items after it sizeof( void * ) bytes earlier */ + memmove(&l->items[i], &l->items[i + 1], + (l->items_count - i - 1) * sizeof(void *)); + + l->items_count--; + break; + } + } + + av_qsv_list_unlock(l); +} + +void *av_qsv_list_item(av_qsv_list * l, int i) +{ + void *ret = NULL; + + if (i < 0) + return NULL; + + av_qsv_list_lock(l); + if( i < l->items_count) + ret = l->items[i]; + av_qsv_list_unlock(l); + return ret; +} + +void av_qsv_list_insert(av_qsv_list * l, int pos, void *p) +{ + + if (!p) + return; + + av_qsv_list_lock(l); + + if (l->items_count == l->items_alloc) { + l->items_alloc += AV_QSV_JOB_SIZE_DEFAULT; + l->items = av_realloc(l->items, l->items_alloc * sizeof(void *)); + } + + if (l->items_count != pos) { + memmove(&l->items[pos + 1], &l->items[pos], + (l->items_count - pos) * sizeof(void *)); + } + + l->items[pos] = p; + l->items_count--; + + av_qsv_list_unlock(l); +} + +void av_qsv_list_close(av_qsv_list ** _l) +{ + av_qsv_list *l = *_l; +#if HAVE_THREADS + int mut_ret; +#endif + + av_qsv_list_lock(l); + + av_free(l->items); + +#if HAVE_THREADS + if (l->mutex){ + mut_ret = pthread_mutex_unlock(l->mutex); + if( mut_ret ) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_unlock issue[%d] at %s\n",mut_ret, __FUNCTION__); + mut_ret = pthread_mutex_destroy(&l->mutex); + mut_ret = pthread_mutexattr_destroy(&l->mta); + } +#endif + av_freep(_l); +} + +int av_qsv_list_lock(av_qsv_list *l){ + int ret = 0; +#if HAVE_THREADS + if (l->mutex){ + ret = pthread_mutex_lock(l->mutex); + if( ret ) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_lock issue[%d] at %s\n",ret, __FUNCTION__); + } +#endif + return ret; +} + +int av_qsv_list_unlock(av_qsv_list *l){ + int ret = 0; +#if HAVE_THREADS + if (l->mutex){ + ret = pthread_mutex_unlock(l->mutex); + if( ret ) + av_log(NULL, AV_LOG_ERROR, "pthread_mutex_unlock issue[%d] at %s\n",ret, __FUNCTION__); + } +#endif + return ret; +} + +int av_is_qsv_available(mfxIMPL impl, mfxVersion * ver) +{ + mfxStatus sts = MFX_ERR_NONE; + mfxSession mfx_session; + + memset(&mfx_session, 0, sizeof(mfxSession)); + sts = MFXInit(impl, ver, &mfx_session); + if (sts >= 0) + MFXClose(mfx_session); + return sts; +} + +void av_qsv_wait_on_sync(av_qsv_context *qsv, av_qsv_stage *stage) +{ + int iter = 0; + mfxStatus sts = MFX_ERR_NONE; + if( stage ) + if(*stage->out.sync->p_sync){ + while(1){ + iter++; + sts = MFXVideoCORE_SyncOperation(qsv->mfx_session,*stage->out.sync->p_sync, AV_QSV_SYNC_TIME_DEFAULT); + if(MFX_WRN_IN_EXECUTION == sts){ + + if(iter>20) + AV_QSV_DEBUG_ASSERT(1, "Sync failed"); + + av_qsv_sleep(10); + continue; + } + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + break; + } + } +} \ No newline at end of file diff --git a/libavcodec/qsv.h b/libavcodec/qsv.h new file mode 100644 index 0000000..52378cf --- /dev/null +++ b/libavcodec/qsv.h @@ -0,0 +1,494 @@ +/* ********************************************************************* *\ + +Copyright (C) 2013 Intel Corporation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +- Neither the name of Intel Corporation nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +\* ********************************************************************* */ + +#ifndef AVCODEC_QSV_H +#define AVCODEC_QSV_H + +/** + * @file + * @ingroup lavc_codec_hwaccel_qsv + * Common header for QSV/MediaSDK acceleration + */ + +/** + * @defgroup lavc_codec_hwaccel_qsv QSV/MediaSDK based Decode/Encode and VPP + * @ingroup lavc_codec_hwaccel + * + * As Intel Quick Sync Video (QSV) can decode/preprocess/encode with HW + * acceleration. + * + * Supported features: + * - access: + * - format AV_PIX_FMT_QSV_H264, AVCodec decoder based implementation + * - name "h264_qsv", avcodec_find_decoder_by_name( "h264_qsv") + * - IO Pattern: + * - Opaque memory: MFX_IOPATTERN_OUT_OPAQUE_MEMORY // Video memory is + * MFX_IMPL_HARDWARE or MFX_IMPL_AUTO and runtime support, + * otherwise: System Memory + * - System memory: MFX_IOPATTERN_OUT_SYSTEM_MEMORY + * - Allocators: + * - default allocator for System memory: MFX_MEMTYPE_SYSTEM_MEMORY + * - details: + * implementation as "per frame" + * + * TODO list: + * - access: + * - format AV_PIX_FMT_QSV_MPEG2 + * - format AV_PIX_FMT_QSV_VC1 + * - format AV_PIX_FMT_QSV, see "details" below + * - IO Pattern: + * - VIDEO_MEMORY // MFX_IOPATTERN_OUT_VIDEO_MEMORY + * - Allocators: + * - Video memory: MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET / + * MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET + * - details: + * "per slice" support: AV_PIX_FMT_QSV with AVHWAccel based implementation + * + * Note av_qsv_config struct required to fill in via + * AVCodecContext.hwaccel_context + * + * As per frame, note AVFrame.data[2] (qsv_atom) used for frame atom id, + * data/linesize should be used together with SYSTEM_MEMORY and tested + * + * Note: Compilation would require: + * - Intel MediaSDK headers, Full SDK is avaialble from the original web site: + * http://software.intel.com/en-us/vcsource/tools/media-SDK + * Will be referenced as mfx/*.h (mfxdefs.h, mfxstructures.h, ... ) + * and + * - Final application has to link against Intel MediaSDK dispatcher, available + * at MediaSDK as well + * + * Target OS: as per available dispatcher and driver support + * + * Implementation details: + * Provided struct av_qsv_context contain several struct av_qsv_space(s) for decode, + * VPP and encode. + * av_qsv_space just contain needed environment for the appropriate action. + * Based on this - pipeline (see pipes) will be build to pass details such as + * mfxFrameSurface1* and mfxSyncPoint* from one action to the next. + * + * Resources re-usage (av_qsv_flush_stages): + * av_qsv_context *qsv = (av_qsv_context *)video_codec_ctx->priv_data; + * av_qsv_list *pipe = (av_qsv_list *)video_frame->data[2]; + * av_qsv_flush_stages( qsv->pipes, &pipe ); + * + * DTS re-usage: + * av_qsv_dts_pop(qsv); + * + * for video,DX9/11 memory it has to be Unlock'ed as well + * + * Implementation is thread aware and uses synchronization point(s) from MediaSDK + * as per configuration. + * + * For the details of MediaSDK usage and options available - please refer to the + * available documentation at MediaSDK. + * + * Feature set used from MSDK is defined by AV_QSV_MSDK_VERSION_MAJOR and + * AV_QSV_MSDK_VERSION_MINOR + * + * @{ + */ + +#include +#include +#include "mfx/mfxvideo.h" +#include "libavutil/mem.h" +#include "libavutil/time.h" + +#ifdef HAVE_AV_CONFIG_H +#include "config.h" +#endif + +#if HAVE_THREADS +#if defined (__GNUC__) +#include +#define ff_qsv_atomic_inc(ptr) __sync_add_and_fetch(ptr,1) +#define ff_qsv_atomic_dec(ptr) __sync_sub_and_fetch (ptr,1) +#elif HAVE_WINDOWS_H // MSVC case +#include +#if HAVE_PTHREADS +#include +#elif HAVE_W32THREADS +#include "w32pthreads.h" +#endif +#define ff_qsv_atomic_inc(ptr) InterlockedIncrement(ptr) +#define ff_qsv_atomic_dec(ptr) InterlockedDecrement (ptr) +#else +// targeting only for MinGW or MSVC +#endif + +#else +#define ff_qsv_atomic_inc(ptr) ((*ptr)++) +#define ff_qsv_atomic_dec(ptr) ((*ptr)--) +#endif + + +// sleep is defined in milliseconds +#define av_qsv_sleep(x) av_usleep((x)*1000) + +#define AV_QSV_ZERO_MEMORY(VAR) {memset(&VAR, 0, sizeof(VAR));} +#define AV_QSV_ALIGN32(X) (((mfxU32)((X)+31)) & (~ (mfxU32)31)) +#define AV_QSV_ALIGN16(value) (((value + 15) >> 4) << 4) +#ifndef AV_QSV_PRINT_RET_MSG +#define AV_QSV_PRINT_RET_MSG(ERR) { av_log(NULL, AV_LOG_FATAL,"Error code %d,\t%s\t%d\n", ERR, __FUNCTION__, __LINE__); } +#endif + +#ifndef AV_QSV_DEBUG_ASSERT +#define AV_QSV_DEBUG_ASSERT(x,y) {if ((x)) {av_log(NULL, AV_LOG_FATAL,"\nASSERT: %s\n",y);};} +#endif + +#define AV_QSV_CHECK_RESULT(P, X, ERR) {if ((X) > (P)) {AV_QSV_PRINT_RET_MSG(ERR); return ERR;}} +#define AV_QSV_CHECK_POINTER(P, ERR) {if (!(P)) {AV_QSV_PRINT_RET_MSG(ERR); return ERR;}} +#define AV_QSV_IGNORE_MFX_STS(P, X) {if ((X) == (P)) {P = MFX_ERR_NONE;}} + +#define AV_QSV_ID_BUFFER MFX_MAKEFOURCC('B','U','F','F') +#define AV_QSV_ID_FRAME MFX_MAKEFOURCC('F','R','M','E') + +#define AV_QSV_SURFACE_NUM 80 +#define AV_QSV_SYNC_NUM AV_QSV_SURFACE_NUM*3/4 +#define AV_QSV_BUF_SIZE_DEFAULT 4096*2160*10 +#define AV_QSV_JOB_SIZE_DEFAULT 10 +#define AV_QSV_SYNC_TIME_DEFAULT 10000 +// see av_qsv_get_free_sync, av_qsv_get_free_surface , 100 if usleep(10*1000)(10ms) == 1 sec +#define AV_QSV_REPEAT_NUM_DEFAULT 100 +#define AV_QSV_ASYNC_DEPTH_DEFAULT 4 + +// version of MSDK/QSV API currently used +#define AV_QSV_MSDK_VERSION_MAJOR 1 +#define AV_QSV_MSDK_VERSION_MINOR 3 + +typedef enum AV_QSV_STAGE_TYPE { + +#define AV_QSV_DECODE_MASK 0x001 + AV_QSV_DECODE = 0x001, + +#define AV_QSV_VPP_MASK 0x0F0 + // "Mandatory VPP filter" , might be with "Hint-based VPP filters" + AV_QSV_VPP_DEFAULT = 0x010, + // "User Modules" etc + AV_QSV_VPP_USER = 0x020, + +#define av_QSV_ENCODE_MASK 0x100 + AV_QSV_ENCODE = 0x100 +#define AV_QSV_ANY_MASK 0xFFF +} AV_QSV_STAGE_TYPE; + + +typedef struct av_qsv_list { + // practically pthread_mutex_t + void *mutex; +#if HAVE_THREADS + pthread_mutexattr_t mta; +#endif + + void **items; + int items_alloc; + + int items_count; +} av_qsv_list; + +typedef struct av_qsv_sync { + mfxSyncPoint* p_sync; + int in_use; +} av_qsv_sync; + +typedef struct av_qsv_stage { + AV_QSV_STAGE_TYPE type; + struct { + mfxBitstream *p_bs; + mfxFrameSurface1 *p_surface; + } in; + struct { + mfxBitstream *p_bs; + mfxFrameSurface1 *p_surface; + av_qsv_sync *sync; + } out; + av_qsv_list *pending; +} av_qsv_stage; + +typedef struct av_qsv_task { + mfxBitstream *bs; + av_qsv_stage *stage; +} av_qsv_task; + + +typedef struct av_qsv_space { + + uint8_t is_init_done; + + AV_QSV_STAGE_TYPE type; + + mfxVideoParam m_mfxVideoParam; + + mfxFrameAllocResponse response; + mfxFrameAllocRequest request[2]; // [0] - in, [1] - out, if needed + + mfxExtOpaqueSurfaceAlloc ext_opaque_alloc; + mfxExtBuffer **p_ext_params; + uint16_t p_ext_param_num; + + uint16_t surface_num_max_used; + uint16_t surface_num; + mfxFrameSurface1 *p_surfaces[AV_QSV_SURFACE_NUM]; + + uint16_t sync_num_max_used; + uint16_t sync_num; + av_qsv_sync *p_syncp[AV_QSV_SYNC_NUM]; + + mfxBitstream bs; + uint8_t *p_buf; + size_t p_buf_max_size; + + // only for encode and tasks + av_qsv_list *tasks; + + av_qsv_list *pending; + + // storage for allocations/mfxMemId* + mfxMemId *mids; +} av_qsv_space; + +typedef struct av_qsv_context { + volatile int is_context_active; + + mfxIMPL impl; + mfxSession mfx_session; + mfxVersion ver; + + // decode + av_qsv_space *dec_space; + // encode + av_qsv_space *enc_space; + // vpp + av_qsv_list *vpp_space; + + av_qsv_list *pipes; + + // MediaSDK starting from API version 1.6 includes DecodeTimeStamp + // in addition to TimeStamp + // see also AV_QSV_MSDK_VERSION_MINOR , AV_QSV_MSDK_VERSION_MAJOR + av_qsv_list *dts_seq; + + // practically pthread_mutex_t + void *qts_seq_mutex; + + int is_anex; + + void *qsv_config; + +} av_qsv_context; + +typedef enum { + QSV_PART_ANY = 0, + QSV_PART_LOWER, + QSV_PART_UPPER +} av_qsv_split; + +typedef struct { + int64_t dts; +} av_qsv_dts; + +typedef struct av_qsv_alloc_frame { + mfxU32 id; + mfxFrameInfo info; +} av_qsv_alloc_frame; + +typedef struct av_qsv_alloc_buffer { + mfxU32 id; + mfxU32 nbytes; + mfxU16 type; +} av_qsv_alloc_buffer; + +typedef struct av_qsv_allocators_space { + av_qsv_space *space; + mfxFrameAllocator frame_alloc; + mfxBufferAllocator buffer_alloc; +} av_qsv_allocators_space; + +typedef struct av_qsv_config { + /** + * Set asynch depth of processing with QSV + * Format: 0 and more + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + int async_depth; + + /** + * Range of numbers that indicate trade-offs between quality and speed. + * Format: from 1/MFX_TARGETUSAGE_BEST_QUALITY to 7/MFX_TARGETUSAGE_BEST_SPEED inclusive + * + * - encoding: Set by user. + * - decoding: unused + */ + int target_usage; + + /** + * Number of reference frames; if NumRefFrame = 0, this parameter is not specified. + * Format: 0 and more + * + * - encoding: Set by user. + * - decoding: unused + */ + int num_ref_frame; + + /** + * Distance between I- or P- key frames; if it is zero, the GOP structure is unspecified. + * Note: If GopRefDist = 1, there are no B-frames used. + * + * - encoding: Set by user. + * - decoding: unused + */ + int gop_ref_dist; + + /** + * Number of pictures within the current GOP (Group of Pictures); if GopPicSize=0, + * then the GOP size is unspecified. If GopPicSize=1, only I-frames are used. + * + * - encoding: Set by user. + * - decoding: unused + */ + int gop_pic_size; + + /** + * Set type of surfaces used with QSV + * Format: "IOPattern enum" of Media SDK + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + int io_pattern; + + /** + * Set amount of additional surfaces might be needed + * Format: ammount of additional buffers(surfaces+syncs) + * to allocate in advance + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + int additional_buffers; + + /** + * If pipeline should be sync. + * Format: wait time in milliseconds, + * AV_QSV_SYNC_TIME_DEFAULT/10000 might be a good value + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + int sync_need; + + /** + * Type of implementation needed + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + int impl_requested; + + /** + * if QSV usage is multithreaded. + * Format: Yes/No, 1/0 + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + int usage_threaded; + + /** + * if QSV use an external allocation (valid per session/mfxSession) + * Format: pointer to allocators, if default: 0 + * + * note that: + * System Memory: can be used without provided and external allocator, + * meaning MediaSDK will use an internal one + * Video Memory: in this case - we must provide an external allocator + * Also, Media SDK session doesn't require external allocator if the application + * uses opaque memory + * + * Calls SetFrameAllocator/SetBufferAllocator + * (MFXVideoCORE_SetFrameAllocator/MFXVideoCORE_SetBufferAllocator) + * are to pass allocators to Media SDK + * + * - encoding: Set by user. + * - decoding: Set by user. + */ + av_qsv_allocators_space *allocators; + +} av_qsv_config; + +#define ANEX_UNKNOWN 0 +#define ANEX_PREFIX 1 +#define ANEX_NO_PREFIX 2 + +static const uint8_t ff_prefix_code[] = { 0x00, 0x00, 0x00, 0x01 }; + +int av_qsv_get_free_sync(av_qsv_space *, av_qsv_context *); +int av_qsv_get_free_surface(av_qsv_space *, av_qsv_context *, mfxFrameInfo *, + av_qsv_split); +int av_qsv_get_free_encode_task(av_qsv_list *); + +int av_is_qsv_available(mfxIMPL, mfxVersion *); +void av_qsv_wait_on_sync(av_qsv_context *, av_qsv_stage *); + +void av_qsv_add_context_usage(av_qsv_context *, int); + +void av_qsv_pipe_list_create(av_qsv_list **, int); +void av_qsv_pipe_list_clean(av_qsv_list **); + +void av_qsv_add_stagee(av_qsv_list **, av_qsv_stage *, int); +av_qsv_stage *av_qsv_get_last_stage(av_qsv_list *); +av_qsv_list *av_qsv_pipe_by_stage(av_qsv_list *, av_qsv_stage *); +void av_qsv_flush_stages(av_qsv_list *, av_qsv_list **); + +void av_qsv_dts_ordered_insert(av_qsv_context *, int, int, int64_t, int); +void av_qsv_dts_pop(av_qsv_context *); + +av_qsv_stage *av_qsv_stage_init(void); +void av_qsv_stage_clean(av_qsv_stage **); +int av_qsv_context_clean(av_qsv_context *); + +int ff_qsv_is_sync_in_pipe(mfxSyncPoint *, av_qsv_context *); +int ff_qsv_is_surface_in_pipe(mfxFrameSurface1 *, av_qsv_context *); + +av_qsv_list *av_qsv_list_init(int); +int av_qsv_list_lock(av_qsv_list *); +int av_qsv_list_unlock(av_qsv_list *); +int av_qsv_list_add(av_qsv_list *, void *); +void av_qsv_list_rem(av_qsv_list *, void *); +void av_qsv_list_insert(av_qsv_list *, int, void *); +void av_qsv_list_close(av_qsv_list **); + +int av_qsv_list_count(av_qsv_list *); +void *av_qsv_list_item(av_qsv_list *, int); + +/* @} */ + +#endif //AVCODEC_QSV_H diff --git a/libavcodec/qsv_h264.c b/libavcodec/qsv_h264.c new file mode 100644 index 0000000..53a9f16 --- /dev/null +++ b/libavcodec/qsv_h264.c @@ -0,0 +1,982 @@ +/* ********************************************************************* *\ + +Copyright (C) 2013 Intel Corporation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +- Neither the name of Intel Corporation nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +\* ********************************************************************* */ + +#include "h264.h" +#include "h264data.h" +#include "qsv_h264.h" + +static av_qsv_config av_qsv_default_config = { + .async_depth = AV_QSV_ASYNC_DEPTH_DEFAULT, + .target_usage = MFX_TARGETUSAGE_BALANCED, + .num_ref_frame = 0, + .gop_ref_dist = 0, + .gop_pic_size = 0, + .io_pattern = MFX_IOPATTERN_OUT_OPAQUE_MEMORY, + .additional_buffers = 0, + .sync_need = 0, + .impl_requested = MFX_IMPL_HARDWARE, + .usage_threaded = 0, + .allocators = 0, +}; + +static av_qsv_allocators_space av_qsv_default_system_allocators = { + // fill to access mids + .space = 0, + + .frame_alloc = { + .pthis = &av_qsv_default_system_allocators, + .Alloc = ff_qsv_mem_frame_alloc, + .Lock = ff_qsv_mem_frame_lock, + .Unlock = ff_qsv_mem_frame_unlock, + .GetHDL = ff_qsv_mem_frame_getHDL, + .Free = ff_qsv_mem_frame_free, + }, + .buffer_alloc = { + .pthis = &av_qsv_default_system_allocators, + .Alloc = ff_qsv_mem_buffer_alloc, + .Lock = ff_qsv_mem_buffer_lock, + .Unlock = ff_qsv_mem_buffer_unlock, + .Free = ff_qsv_mem_buffer_free, + }, +}; + +static const uint8_t ff_slice_code[] = { 0x00, 0x00, 0x01, 0x65 }; + +int ff_qsv_nal_find_start_code(uint8_t * pb, size_t size) +{ + if ((int) size < 4) + return 0; + + while ((4 <= size) && ((0 != pb[0]) || (0 != pb[1]) || (0 != pb[2]) || (1 != pb[3]))) { + pb += 1; + size -= 1; + } + + if (4 <= size) + return 1; + + return 0; +} + +int ff_qsv_dec_init_clean(AVCodecContext *avctx) +{ + av_qsv_context *qsv = avctx->priv_data; + av_qsv_context_clean(qsv); + av_freep(&avctx->priv_data); + return 0; +} + +int ff_qsv_dec_init(AVCodecContext * avctx) +{ + int ret = 0; + mfxStatus sts = MFX_ERR_NONE; + size_t current_offset = 6; + int header_size = 0; + unsigned char *current_position; + size_t current_size; + + av_qsv_context *qsv = avctx->priv_data; + av_qsv_space *qsv_decode = qsv->dec_space; + av_qsv_config *qsv_config_context = avctx->hwaccel_context; + + qsv->impl = qsv_config_context->impl_requested; + + memset(&qsv->mfx_session, 0, sizeof(mfxSession)); + qsv->ver.Major = AV_QSV_MSDK_VERSION_MAJOR; + qsv->ver.Minor = AV_QSV_MSDK_VERSION_MINOR; + + sts = MFXInit(qsv->impl, &qsv->ver, &qsv->mfx_session); + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + + AV_QSV_ZERO_MEMORY(qsv_decode->m_mfxVideoParam); + AV_QSV_ZERO_MEMORY(qsv_decode->m_mfxVideoParam.mfx); + qsv_decode->m_mfxVideoParam.mfx.CodecId = MFX_CODEC_AVC; + qsv_decode->m_mfxVideoParam.IOPattern = + qsv_config_context->io_pattern; + + qsv_decode->m_mfxVideoParam.AsyncDepth = + qsv_config_context->async_depth; + + AV_QSV_ZERO_MEMORY(qsv_decode->bs); + { + current_position = avctx->extradata; + current_size = avctx->extradata_size; + + if (!ff_qsv_nal_find_start_code(current_position, current_size)) { + + while (current_offset <= current_size) { + int current_nal_size = + (unsigned char) current_position[current_offset] << 8 | + (unsigned char) current_position[current_offset + 1]; + unsigned char nal_type = + (unsigned char) current_position[current_offset + 2] & 0x1F; + + if (nal_type == NAL_SPS || nal_type == NAL_PPS) { + memcpy(&qsv_decode->p_buf[header_size], ff_prefix_code, + sizeof(ff_prefix_code)); + header_size += sizeof(ff_prefix_code); + memcpy(&qsv_decode->p_buf[header_size], + ¤t_position[current_offset + 2], + current_nal_size); + + // fix for PPS as it comes after SPS, so - last + if (nal_type == NAL_PPS) { + // fix of MFXVideoDECODE_DecodeHeader: needs one SLICE to find, any SLICE + memcpy(&qsv_decode->p_buf + [header_size + current_nal_size], + ff_slice_code, current_nal_size); + header_size += sizeof(ff_slice_code); + } + } + + header_size += current_nal_size; + current_offset += current_nal_size + 3; + } + } else { + memcpy(&qsv_decode->p_buf[0], avctx->extradata, + avctx->extradata_size); + header_size = avctx->extradata_size; + memcpy(&qsv_decode->p_buf + [header_size], ff_slice_code, sizeof(ff_slice_code)); + header_size += sizeof(ff_slice_code); + } + } + + qsv_decode->bs.Data = qsv_decode->p_buf; + qsv_decode->bs.DataLength = header_size; + qsv_decode->bs.MaxLength = qsv_decode->p_buf_max_size; + + if (qsv_decode->bs.DataLength > qsv_decode->bs.MaxLength) { + av_log(avctx, AV_LOG_FATAL, "DataLength > MaxLength\n"); + return -1; + } + + sts = MFXVideoDECODE_DecodeHeader(qsv->mfx_session, &qsv_decode->bs, + &qsv_decode->m_mfxVideoParam); + if (sts < MFX_ERR_NONE && avctx->height && avctx->width) + { + av_log(avctx, AV_LOG_QUIET,"DecodeHeader failed with result:%d\n",sts); + sts = MFX_ERR_NONE; + + // to cover absents of SPS details + qsv_decode->m_mfxVideoParam.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; + qsv_decode->m_mfxVideoParam.mfx.FrameInfo.Width = AV_QSV_ALIGN16(avctx->width); + qsv_decode->m_mfxVideoParam.mfx.FrameInfo.Height = avctx->field_order > AV_FIELD_PROGRESSIVE ? AV_QSV_ALIGN32(avctx->height) : AV_QSV_ALIGN16(avctx->height); + qsv_decode->m_mfxVideoParam.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + } + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + + qsv_decode->bs.DataLength -= sizeof(ff_slice_code); + + memset(&qsv_decode->request, 0, sizeof(mfxFrameAllocRequest) * 2); + sts = MFXVideoDECODE_QueryIOSurf(qsv->mfx_session, + &qsv_decode->m_mfxVideoParam, + &qsv_decode->request); + + AV_QSV_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + + qsv_decode->surface_num = + FFMIN(qsv_decode->request[0].NumFrameSuggested + + qsv_config_context->async_depth + + qsv_config_context->additional_buffers, AV_QSV_SURFACE_NUM); + + if (qsv_decode->surface_num <= 0) + qsv_decode->surface_num = AV_QSV_SURFACE_NUM; + + if (qsv_decode->m_mfxVideoParam.IOPattern == + MFX_IOPATTERN_OUT_SYSTEM_MEMORY) { + + // as per non-opaque memory: + if (!qsv_config_context->allocators) { + av_log(avctx, AV_LOG_INFO, + "Using default allocators for QSV decode\n"); + ((av_qsv_config *) avctx->hwaccel_context)->allocators = + &av_qsv_default_system_allocators; + } + + qsv_config_context->allocators->space = qsv_decode; + + qsv_decode->request[0].NumFrameMin = qsv_decode->surface_num; + qsv_decode->request[0].NumFrameSuggested = qsv_decode->surface_num; + + qsv_decode->request[0].Type = MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE; + // qsv_decode->request[0].Type |= m_bd3dAlloc ? MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET : MFX_MEMTYPE_SYSTEM_MEMORY; + qsv_decode->request[0].Type |= MFX_MEMTYPE_SYSTEM_MEMORY; + + qsv_config_context->allocators-> + frame_alloc.Alloc(qsv_config_context->allocators, + &qsv_decode->request[0], + &qsv_decode->response); + } + + for (int i = 0; i < qsv_decode->surface_num; i++) { + qsv_decode->p_surfaces[i] = av_mallocz(sizeof(mfxFrameSurface1)); + AV_QSV_CHECK_POINTER(qsv_decode->p_surfaces[i], + AVERROR(ENOMEM)); + memcpy(&(qsv_decode->p_surfaces[i]->Info), + &(qsv_decode->request[0].Info), sizeof(mfxFrameInfo)); + + // for an external(like DX9/11) based allocation: + // we bind: + // m_pmfxSurfaces[i].Data.MemId = m_mfxResponse.mids[i]; + // else, System memory: + if (qsv_decode->m_mfxVideoParam.IOPattern == + MFX_IOPATTERN_OUT_SYSTEM_MEMORY) { + sts = + qsv_config_context->allocators-> + frame_alloc.Lock(qsv_config_context->allocators, + qsv_decode->response.mids[i], + &(qsv_decode->p_surfaces[i]->Data)); + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + } + } + + qsv_decode->sync_num = FFMIN(qsv_decode->surface_num, AV_QSV_SYNC_NUM); + for (int i = 0; i < qsv_decode->sync_num; i++) { + qsv_decode->p_syncp[i] = av_mallocz(sizeof(av_qsv_sync)); + AV_QSV_CHECK_POINTER(qsv_decode->p_syncp[i], AVERROR(ENOMEM)); + qsv_decode->p_syncp[i]->p_sync = av_mallocz(sizeof(mfxSyncPoint)); + AV_QSV_CHECK_POINTER(qsv_decode->p_syncp[i]->p_sync, AVERROR(ENOMEM)); + } + + memset(&qsv_decode->ext_opaque_alloc, 0, + sizeof(mfxExtOpaqueSurfaceAlloc)); + + if (qsv_decode->m_mfxVideoParam.IOPattern == + MFX_IOPATTERN_OUT_OPAQUE_MEMORY) { + qsv_decode->m_mfxVideoParam.NumExtParam = qsv_decode->p_ext_param_num = 1; + + qsv_decode->p_ext_params = av_mallocz(sizeof(mfxExtBuffer *)*qsv_decode->p_ext_param_num); + AV_QSV_CHECK_POINTER(qsv_decode->p_ext_params, AVERROR(ENOMEM)); + + qsv_decode->m_mfxVideoParam.ExtParam = qsv_decode->p_ext_params; + + qsv_decode->ext_opaque_alloc.Out.Surfaces = qsv_decode->p_surfaces; + qsv_decode->ext_opaque_alloc.Out.NumSurface = qsv_decode->surface_num; + qsv_decode->ext_opaque_alloc.Out.Type = qsv_decode->request[0].Type; + + qsv_decode->ext_opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; + qsv_decode->ext_opaque_alloc.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); + qsv_decode->p_ext_params[0] = (mfxExtBuffer *) &qsv_decode->ext_opaque_alloc; + } + + sts = + MFXVideoDECODE_Init(qsv->mfx_session, + &qsv_decode->m_mfxVideoParam); + + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + + qsv_decode->is_init_done = 1; + return ret; +} + +av_cold int ff_qsv_decode_init(AVCodecContext * avctx) +{ + av_qsv_context *qsv; + av_qsv_space *qsv_decode; + av_qsv_config **qsv_config_context = + (av_qsv_config **) & avctx->hwaccel_context; + + qsv = avctx->priv_data; + + if (qsv && qsv->dec_space && qsv->dec_space->is_init_done || !avctx->extradata_size) + return 0; + + if(!qsv) + qsv = av_mallocz(sizeof(av_qsv_context)); + if (!qsv) + return AVERROR(ENOMEM); + + if(!qsv_decode) + qsv_decode = av_mallocz(sizeof(av_qsv_space)); + if (!qsv_decode){ + free(qsv); + return AVERROR(ENOMEM); + } + avctx->priv_data = qsv; + qsv->dec_space = qsv_decode; + + qsv_decode->p_buf_max_size = AV_QSV_BUF_SIZE_DEFAULT; + if(!qsv_decode->p_buf) + qsv_decode->p_buf = av_malloc(qsv_decode->p_buf_max_size * sizeof(uint8_t)); + if (!qsv_decode->p_buf) + return AVERROR(ENOMEM); + + if (!(*qsv_config_context)) { + av_log(avctx, AV_LOG_INFO, "Using default config for QSV decode\n"); + avctx->hwaccel_context = &av_qsv_default_config; + } else if ((*qsv_config_context)->io_pattern != MFX_IOPATTERN_OUT_OPAQUE_MEMORY && + (*qsv_config_context)->io_pattern != MFX_IOPATTERN_OUT_SYSTEM_MEMORY) { + avpriv_report_missing_feature(avctx, + "Only MFX_IOPATTERN_OUT_OPAQUE_MEMORY" + " and MFX_IOPATTERN_OUT_SYSTEM_MEMORY" + " are currently supported\n"); + return AVERROR_PATCHWELCOME; + } + + qsv->qsv_config = avctx->hwaccel_context; + + av_qsv_add_context_usage(qsv, + HAVE_THREADS + ? (*qsv_config_context)->usage_threaded : + HAVE_THREADS); + + // allocation of p_syncp and p_surfaces inside of ff_qsv_dec_init + return ff_qsv_dec_init(avctx); +} + +static av_cold int qsv_decode_end(AVCodecContext * avctx) +{ + mfxStatus sts = MFX_ERR_NONE; + av_qsv_context *qsv = avctx->priv_data; + av_qsv_config *qsv_config_context = avctx->hwaccel_context; + + if (qsv) { + av_qsv_space *qsv_decode = qsv->dec_space; + if (qsv_decode && qsv_decode->is_init_done) { + // todo: change to AV_LOG_INFO + av_log(avctx, AV_LOG_QUIET, + "qsv_decode report done, max_surfaces: %u/%u , max_syncs: %u/%u\n", + qsv_decode->surface_num_max_used, + qsv_decode->surface_num, qsv_decode->sync_num_max_used, + qsv_decode->sync_num); + } + + if (qsv_config_context + && qsv_config_context->io_pattern == + MFX_IOPATTERN_OUT_SYSTEM_MEMORY) { + if (qsv_config_context->allocators) { + sts = + qsv_config_context->allocators-> + frame_alloc.Free(qsv_config_context->allocators, + &qsv_decode->response); + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + } else { + av_log(avctx, AV_LOG_FATAL, + "No QSV allocators found for clean up\n"); + } + } + // closing the own resources + av_freep(&qsv_decode->p_buf); + + for (int i = 0; i < qsv_decode->surface_num; i++) { + av_freep(&qsv_decode->p_surfaces[i]); + } + qsv_decode->surface_num = 0; + + if( qsv_decode->p_ext_param_num || qsv_decode->p_ext_params ) + av_freep(&qsv_decode->p_ext_params); + qsv_decode->p_ext_param_num = 0; + + for (int i = 0; i < qsv_decode->sync_num; i++) { + av_freep(&qsv_decode->p_syncp[i]->p_sync); + av_freep(&qsv_decode->p_syncp[i]); + } + qsv_decode->sync_num = 0; + qsv_decode->is_init_done = 0; + + av_freep(&qsv->dec_space); + + // closing commong stuff + av_qsv_context_clean(qsv); + } + + return 0; +} + +static int qsv_decode_frame(AVCodecContext * avctx, void *data, + int *data_size, AVPacket * avpkt) +{ + mfxStatus sts = MFX_ERR_NONE; + av_qsv_context *qsv = avctx->priv_data; + av_qsv_space *qsv_decode; + av_qsv_config *qsv_config_context = avctx->hwaccel_context; + int *got_picture_ptr = data_size; + int ret_value = 1; + uint8_t *current_position = avpkt->data; + int current_size = avpkt->size; + int frame_processed = 0; + size_t frame_length = 0; + int surface_idx = 0; + int extra_data_workaround = 0; + + int sync_idx = 0; + int current_nal_size; + unsigned char nal_type; + av_qsv_stage *new_stage = 0; + mfxBitstream *input_bs = NULL; + size_t current_offset = 2; + av_qsv_list *qsv_atom = 0; + av_qsv_list *pipe = 0; + + AVFrame *picture = (AVFrame *) data; + + *got_picture_ptr = 0; + + qsv = avctx->priv_data; + if(!qsv){ + extra_data_workaround = !avctx->extradata_size; + if(extra_data_workaround){ + avctx->extradata = avpkt->data; + avctx->extradata_size = avpkt->size; + } + sts = ff_qsv_decode_init(avctx); + qsv = avctx->priv_data; + if(extra_data_workaround){ + avctx->extradata = 0; + avctx->extradata_size = 0; + } + if(sts<0){ + ff_qsv_dec_init_clean(avctx); + *got_picture_ptr = 0; + return sts; + } + } + qsv_decode = qsv->dec_space; + + if (qsv_decode->bs.DataOffset + qsv_decode->bs.DataLength + + current_size > qsv_decode->bs.MaxLength) { + memmove(&qsv_decode->bs.Data[0], + qsv_decode->bs.Data + qsv_decode->bs.DataOffset, + qsv_decode->bs.DataLength); + qsv_decode->bs.DataOffset = 0; + } + + if (current_size) { + if(qsv->is_anex == ANEX_UNKNOWN){ + if (ff_qsv_nal_find_start_code(current_position, current_size) && current_position == avpkt->data) + qsv->is_anex = ANEX_PREFIX; + else + qsv->is_anex = ANEX_NO_PREFIX; + } + if (qsv->is_anex == ANEX_PREFIX){ + memcpy(&qsv_decode->bs.Data[0] + + qsv_decode->bs.DataLength + + qsv_decode->bs.DataOffset, + avpkt->data, + avpkt->size); + qsv_decode->bs.DataLength += avpkt->size; + frame_length += avpkt->size; + } + else + while (current_offset <= current_size) { + current_nal_size = + ((unsigned char) current_position[current_offset - 2] << 24 | + (unsigned char) current_position[current_offset - 1] << 16 | + (unsigned char) current_position[current_offset] << 8 | + (unsigned char) current_position[current_offset + 1]) - 1; + nal_type = + (unsigned char) current_position[current_offset + 2] & 0x1F; + { + frame_length += current_nal_size; + memcpy(&qsv_decode->bs.Data[0] + + qsv_decode->bs.DataLength + + qsv_decode->bs.DataOffset, ff_prefix_code, + sizeof(ff_prefix_code)); + qsv_decode->bs.DataLength += sizeof(ff_prefix_code); + memcpy(&qsv_decode->bs.Data[0] + + qsv_decode->bs.DataLength + + qsv_decode->bs.DataOffset, + ¤t_position[current_offset + 2], + current_nal_size + 1); + qsv_decode->bs.DataLength += current_nal_size + 1; + } + current_offset += current_nal_size + 5; + } + + if (qsv_decode->bs.DataLength > qsv_decode->bs.MaxLength) { + av_log(avctx, AV_LOG_FATAL, "DataLength > MaxLength\n"); + return -1; + } + } + + if (frame_length || current_size == 0) { + + qsv_decode->bs.TimeStamp = avpkt->pts; + + //not a drain + if ((current_size || qsv_decode->bs.DataLength)) + av_qsv_dts_ordered_insert(qsv, 0, 0, qsv_decode->bs.TimeStamp, 0); + + sts = MFX_ERR_NONE; + // ignore warnings, where warnings >0 , and not error codes <0 + while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts + || MFX_WRN_DEVICE_BUSY == sts) { + + if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { + surface_idx = + av_qsv_get_free_surface(qsv_decode, qsv, + &qsv_decode->request[0].Info, + QSV_PART_ANY); + + if (surface_idx == -1) { + *got_picture_ptr = 0; + return 0; + } + } + + if (MFX_WRN_DEVICE_BUSY == sts) + av_qsv_sleep(10); + + sync_idx = av_qsv_get_free_sync(qsv_decode, qsv); + + if (sync_idx == -1) { + *got_picture_ptr = 0; + return 0; + } + new_stage = av_qsv_stage_init(); + input_bs = NULL; + // if to drain last ones + if (current_size || qsv_decode->bs.DataLength) + input_bs = &qsv_decode->bs; + // Decode a frame asynchronously (returns immediately) + // very first IDR / SLICE should be with SPS/PPS + sts = MFXVideoDECODE_DecodeFrameAsync(qsv->mfx_session, input_bs, + qsv_decode->p_surfaces + [surface_idx], + &new_stage->out.p_surface, + qsv_decode->p_syncp[sync_idx]->p_sync); + + new_stage->out.sync = qsv_decode->p_syncp[sync_idx]; + // have some results + if (MFX_ERR_NONE <= sts && MFX_WRN_DEVICE_BUSY != sts && + MFX_WRN_VIDEO_PARAM_CHANGED != sts) { + + ff_qsv_atomic_inc(&(new_stage->out.p_surface->Data.Locked)); + + new_stage->type = AV_QSV_DECODE; + new_stage->in.p_bs = input_bs; + new_stage->in.p_surface = qsv_decode->p_surfaces[surface_idx]; + + pipe = av_qsv_list_init(HAVE_THREADS ? qsv_config_context->usage_threaded : HAVE_THREADS); + av_qsv_add_stagee(&pipe, new_stage, + HAVE_THREADS ? + qsv_config_context->usage_threaded : + HAVE_THREADS); + + av_qsv_list_add(qsv->pipes, pipe); + qsv_atom = pipe; + + // usage for forced decode sync and results, can be avoided if sync done by next stage + // also note wait time for Sync and possible usage with MFX_WRN_IN_EXECUTION check + if (qsv_config_context->sync_need) { + sts = + MFXVideoCORE_SyncOperation(qsv->mfx_session, + qsv_decode->p_syncp[sync_idx]->p_sync, + qsv_config_context->sync_need); + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + + // no need to wait more -> force off + ff_qsv_atomic_dec(&qsv_decode->p_syncp[sync_idx]->in_use); + new_stage->out.sync = 0; + } + + sts = MFX_ERR_NONE; + break; + } + av_qsv_stage_clean(&new_stage); + + /* + Can be because of: + - runtime situation: + - drain procedure: + At the end of the bitstream, the application continuously calls the MFXVideoDECODE_DecodeFrameAsync function with a + NULL bitstream pointer to drain any remaining frames cached within the Intel + Media SDK decoder, until the function returns MFX_ERR_MORE_DATA. + */ + if (MFX_ERR_MORE_DATA == sts) { + // not a drain + if (current_size) { + *got_picture_ptr = 0; + return avpkt->size; + } + // drain + break; + } + if (MFX_ERR_MORE_SURFACE == sts ){ + continue; + } + + AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); + } + + frame_processed = 1; + } + + if (frame_processed) { + + if (current_size) { + *got_picture_ptr = 1; + ret_value = avpkt->size; + } else { + if (MFX_ERR_MORE_DATA != sts) { + *got_picture_ptr = 1; + ret_value = avpkt->size; + } else { + *got_picture_ptr = 0; + return 0; + } + } + + picture->pkt_pts = new_stage->out.p_surface->Data.TimeStamp; + picture->pts = new_stage->out.p_surface->Data.TimeStamp; + + picture->repeat_pict = (qsv_decode->m_mfxVideoParam.mfx.FrameInfo.PicStruct & MFX_PICSTRUCT_FIELD_REPEATED); + picture->interlaced_frame = !(qsv_decode->m_mfxVideoParam.mfx.FrameInfo.PicStruct & MFX_PICSTRUCT_PROGRESSIVE); + picture->top_field_first = (qsv_decode->m_mfxVideoParam.mfx.FrameInfo.PicStruct & MFX_PICSTRUCT_FIELD_TFF); + + // since we do not know it yet from MSDK, let's do just a simple way for now + picture->key_frame = (avctx->frame_number == 0) ? 1 : 0; + + if (qsv_decode->m_mfxVideoParam.IOPattern == MFX_IOPATTERN_OUT_SYSTEM_MEMORY) { + picture->data[0] = new_stage->out.p_surface->Data.Y; + picture->data[1] = new_stage->out.p_surface->Data.VU; + picture->linesize[0] = new_stage->out.p_surface->Info.Width; + picture->linesize[1] = new_stage->out.p_surface->Info.Width; + } else { + picture->data[0] = 0; + picture->data[1] = 0; + picture->linesize[0] = 0; + picture->linesize[1] = 0; + } + + picture->data[2] = qsv_atom; + picture->linesize[2] = 0; + } + + return ret_value; +} + +// Will be called when seeking +static void qsv_flush_dpb(AVCodecContext * avctx) +{ + av_qsv_context *qsv = avctx->priv_data; + av_qsv_space *qsv_decode = qsv->dec_space; + + qsv_decode->bs.DataOffset = 0; + qsv_decode->bs.DataLength = 0; + qsv_decode->bs.MaxLength = qsv_decode->p_buf_max_size; +} + + +mfxStatus ff_qsv_mem_frame_alloc(mfxHDL pthis, + mfxFrameAllocRequest * request, + mfxFrameAllocResponse * response) +{ + mfxStatus sts = MFX_ERR_NONE; + + mfxU32 numAllocated = 0; + + mfxU32 width = AV_QSV_ALIGN32(request->Info.Width); + mfxU32 height = AV_QSV_ALIGN32(request->Info.Height); + mfxU32 nbytes; + + av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis; + av_qsv_alloc_frame *fs; + + if (!this_alloc->space) + return MFX_ERR_NOT_INITIALIZED; + + switch (request->Info.FourCC) { + case MFX_FOURCC_YV12: + case MFX_FOURCC_NV12: + nbytes = + width * height + (width >> 1) * (height >> 1) + + (width >> 1) * (height >> 1); + break; + case MFX_FOURCC_RGB3: + nbytes = width * height + width * height + width * height; + break; + case MFX_FOURCC_RGB4: + nbytes = + width * height + width * height + width * height + + width * height; + break; + case MFX_FOURCC_YUY2: + nbytes = + width * height + (width >> 1) * (height) + + (width >> 1) * (height); + break; + default: + return MFX_ERR_UNSUPPORTED; + } + + this_alloc->space->mids = + av_malloc(sizeof(mfxMemId) * request->NumFrameSuggested); + if (!this_alloc->space->mids) + return MFX_ERR_MEMORY_ALLOC; + + // allocate frames + for (numAllocated = 0; numAllocated < request->NumFrameSuggested; + numAllocated++) { + sts = + this_alloc->buffer_alloc.Alloc(this_alloc->buffer_alloc.pthis, + nbytes + + AV_QSV_ALIGN32(sizeof + (av_qsv_alloc_frame)), + request->Type, + &(this_alloc-> + space->mids[numAllocated])); + + if (MFX_ERR_NONE != sts) + break; + + sts = + this_alloc->buffer_alloc.Lock(this_alloc->buffer_alloc.pthis, + this_alloc-> + space->mids[numAllocated], + (mfxU8 **) & fs); + + if (MFX_ERR_NONE != sts) + break; + + fs->id = AV_QSV_ID_FRAME; + fs->info = request->Info; + this_alloc->buffer_alloc.Unlock(this_alloc->buffer_alloc.pthis, + this_alloc-> + space->mids[numAllocated]); + } + + // check the number of allocated frames + if (numAllocated < request->NumFrameMin) + return MFX_ERR_MEMORY_ALLOC; + + response->NumFrameActual = (mfxU16) numAllocated; + response->mids = this_alloc->space->mids; + + return MFX_ERR_NONE; +} + +mfxStatus ff_qsv_mem_frame_lock(mfxHDL pthis, mfxMemId mid, + mfxFrameData * ptr) +{ + mfxStatus sts = MFX_ERR_NONE; + av_qsv_alloc_frame *fs = 0; + mfxU16 width; + mfxU16 height; + + av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis; + + if (!this_alloc->space) + return MFX_ERR_NOT_INITIALIZED; + if (!ptr) + return MFX_ERR_NULL_PTR; + + + sts = + this_alloc->buffer_alloc.Lock(this_alloc->buffer_alloc.pthis, mid, + (mfxU8 **) & fs); + + if (MFX_ERR_NONE != sts) + return sts; + + if (AV_QSV_ID_FRAME != fs->id) { + this_alloc->buffer_alloc.Unlock(this_alloc->buffer_alloc.pthis, + mid); + return MFX_ERR_INVALID_HANDLE; + } + + width = (mfxU16) AV_QSV_ALIGN32(fs->info.Width); + height = (mfxU16) AV_QSV_ALIGN32(fs->info.Height); + ptr->B = ptr->Y = + (mfxU8 *) fs + AV_QSV_ALIGN32(sizeof(av_qsv_allocators_space)); + + switch (fs->info.FourCC) { + case MFX_FOURCC_NV12: + ptr->U = ptr->Y + width * height; + ptr->V = ptr->U + 1; + ptr->Pitch = width; + break; + case MFX_FOURCC_YV12: + ptr->V = ptr->Y + width * height; + ptr->U = ptr->V + (width >> 1) * (height >> 1); + ptr->Pitch = width; + break; + case MFX_FOURCC_YUY2: + ptr->U = ptr->Y + 1; + ptr->V = ptr->Y + 3; + ptr->Pitch = 2 * width; + break; + case MFX_FOURCC_RGB3: + ptr->G = ptr->B + 1; + ptr->R = ptr->B + 2; + ptr->Pitch = 3 * width; + break; + case MFX_FOURCC_RGB4: + ptr->G = ptr->B + 1; + ptr->R = ptr->B + 2; + ptr->A = ptr->B + 3; + ptr->Pitch = 4 * width; + break; + default: + return MFX_ERR_UNSUPPORTED; + } + + return MFX_ERR_NONE; +} + +mfxStatus ff_qsv_mem_frame_unlock(mfxHDL pthis, mfxMemId mid, + mfxFrameData * ptr) +{ + mfxStatus sts = MFX_ERR_NONE; + av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis; + + sts = + this_alloc->buffer_alloc.Unlock(this_alloc->buffer_alloc.pthis, + mid); + + if (MFX_ERR_NONE != sts) + return sts; + + if (NULL != ptr) { + ptr->Pitch = 0; + ptr->Y = 0; + ptr->U = 0; + ptr->V = 0; + } + + return MFX_ERR_NONE; +} + +mfxStatus ff_qsv_mem_frame_getHDL(mfxHDL pthis, mfxMemId mid, + mfxHDL * handle) +{ + return MFX_ERR_UNSUPPORTED; +} + +mfxStatus ff_qsv_mem_frame_free(mfxHDL pthis, + mfxFrameAllocResponse * response) +{ + mfxStatus sts = MFX_ERR_NONE; + av_qsv_allocators_space *this_alloc = (av_qsv_allocators_space *) pthis; + mfxU32 i; + + if (!response) + return MFX_ERR_NULL_PTR; + + if (!this_alloc->space) + return MFX_ERR_NOT_INITIALIZED; + + if (response->mids) + for (i = 0; i < response->NumFrameActual; i++) { + if (response->mids[i]) { + sts = + this_alloc->buffer_alloc.Free(this_alloc-> + buffer_alloc.pthis, + response->mids[i]); + if (MFX_ERR_NONE != sts) + return sts; + } + } + + av_freep(&response->mids); + + return sts; +} + + +mfxStatus ff_qsv_mem_buffer_alloc(mfxHDL pthis, mfxU32 nbytes, mfxU16 type, + mfxMemId * mid) +{ + av_qsv_alloc_buffer *bs; + mfxU32 header_size; + mfxU8 *buffer_ptr; + + if (!mid) + return MFX_ERR_NULL_PTR; + + if (0 == (type & MFX_MEMTYPE_SYSTEM_MEMORY)) + return MFX_ERR_UNSUPPORTED; + + header_size = AV_QSV_ALIGN32(sizeof(av_qsv_alloc_buffer)); + buffer_ptr = (mfxU8 *) av_malloc(header_size + nbytes); + + if (!buffer_ptr) + return MFX_ERR_MEMORY_ALLOC; + + bs = (av_qsv_alloc_buffer *) buffer_ptr; + bs->id = AV_QSV_ID_BUFFER; + bs->type = type; + bs->nbytes = nbytes; + *mid = (mfxHDL) bs; + return MFX_ERR_NONE; +} + +mfxStatus ff_qsv_mem_buffer_lock(mfxHDL pthis, mfxMemId mid, mfxU8 ** ptr) +{ + av_qsv_alloc_buffer *bs; + + if (!ptr) + return MFX_ERR_NULL_PTR; + + bs = (av_qsv_alloc_buffer *) mid; + + if (!bs) + return MFX_ERR_INVALID_HANDLE; + if (AV_QSV_ID_BUFFER != bs->id) + return MFX_ERR_INVALID_HANDLE; + + *ptr = (mfxU8 *) bs + AV_QSV_ALIGN32(sizeof(av_qsv_alloc_buffer)); + return MFX_ERR_NONE; +} + +mfxStatus ff_qsv_mem_buffer_unlock(mfxHDL pthis, mfxMemId mid) +{ + av_qsv_alloc_buffer *bs = (av_qsv_alloc_buffer *) mid; + + if (!bs || AV_QSV_ID_BUFFER != bs->id) + return MFX_ERR_INVALID_HANDLE; + + return MFX_ERR_NONE; +} + +mfxStatus ff_qsv_mem_buffer_free(mfxHDL pthis, mfxMemId mid) +{ + av_qsv_alloc_buffer *bs = (av_qsv_alloc_buffer *) mid; + if (!bs || AV_QSV_ID_BUFFER != bs->id) + return MFX_ERR_INVALID_HANDLE; + + av_freep(&bs); + return MFX_ERR_NONE; +} + + +AVCodec ff_h264_qsv_decoder = { + .name = "h264_qsv", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_H264, + .init = ff_qsv_decode_init, + .close = qsv_decode_end, + .decode = qsv_decode_frame, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY, + .flush = qsv_flush_dpb, + .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / Intel QSV"), + .pix_fmts = (const enum PixelFormat[]) {AV_PIX_FMT_QSV_H264, + AV_PIX_FMT_NONE}, +}; diff --git a/libavcodec/qsv_h264.h b/libavcodec/qsv_h264.h new file mode 100644 index 0000000..3cbdb3f --- /dev/null +++ b/libavcodec/qsv_h264.h @@ -0,0 +1,65 @@ +/* ********************************************************************* *\ + +Copyright (C) 2013 Intel Corporation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +- Neither the name of Intel Corporation nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +\* ********************************************************************* */ + +#ifndef AVCODEC_QSV_H264_H +#define AVCODEC_QSV_H264_H + +#include "qsv.h" + +int ff_qsv_dec_init(AVCodecContext *); +int ff_qsv_nal_find_start_code(uint8_t * pb, size_t size); + +int ff_qsv_dec_init_clean(AVCodecContext *avctx); +av_cold int ff_qsv_decode_init(AVCodecContext * avctx); +static av_cold int qsv_decode_end(AVCodecContext * avctx); +static int qsv_decode_frame(AVCodecContext * avctx, void *data, + int *data_size, AVPacket * avpkt); +static void qsv_flush_dpb(AVCodecContext * avctx); + + +// Default for SYSTEM MEMORY +// as from MFXFrameAllocator +mfxStatus ff_qsv_mem_frame_alloc(mfxHDL pthis, + mfxFrameAllocRequest * request, + mfxFrameAllocResponse * response); +mfxStatus ff_qsv_mem_frame_lock(mfxHDL pthis, mfxMemId mid, + mfxFrameData * ptr); +mfxStatus ff_qsv_mem_frame_unlock(mfxHDL pthis, mfxMemId mid, + mfxFrameData * ptr); +mfxStatus ff_qsv_mem_frame_getHDL(mfxHDL pthis, mfxMemId mid, + mfxHDL * handle); +mfxStatus ff_qsv_mem_frame_free(mfxHDL pthis, + mfxFrameAllocResponse * response); +// as from mfxBufferAllocator +mfxStatus ff_qsv_mem_buffer_alloc(mfxHDL pthis, mfxU32 nbytes, mfxU16 type, + mfxMemId * mid); +mfxStatus ff_qsv_mem_buffer_lock(mfxHDL pthis, mfxMemId mid, mfxU8 ** ptr); +mfxStatus ff_qsv_mem_buffer_unlock(mfxHDL pthis, mfxMemId mid); +mfxStatus ff_qsv_mem_buffer_free(mfxHDL pthis, mfxMemId mid); + +#endif //AVCODEC_QSV_H264_H diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 6af6596..70fe068 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -193,6 +193,7 @@ enum AVPixelFormat { AV_PIX_FMT_NV16, ///< interleaved chroma YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples) AV_PIX_FMT_NV20LE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian AV_PIX_FMT_NV20BE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian + AV_PIX_FMT_QSV_H264, ///< H.264 HW decoding with QSV, data[2] contains qsv_atom information for MFX_IOPATTERN_OUT_OPAQUE_MEMORY, MFX_IOPATTERN_OUT_VIDEO_MEMORY AV_PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian AV_PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian