diff options
author | Artem Galin <[email protected]> | 2020-07-10 02:31:17 +0100 |
---|---|---|
committer | Scott <[email protected]> | 2020-07-20 16:56:43 +0100 |
commit | bdee4bef65609aea9f56e0a87ad3d713f62033a7 (patch) | |
tree | e272feb3b83b7f8b2d3b31f1763541eb0d60c498 | |
parent | 3fbdd5f5bef30f6180e984f578bbe7e12ba087e4 (diff) |
qsv: added HW offload of resize filter
Started to close gap of filters to be offloaded for HW acceleration,
when possible.
Resize filter is validated and others are in the pipeline, including
DX11.
-rw-r--r-- | libhb/avfilter.c | 22 | ||||
-rw-r--r-- | libhb/cropscale.c | 121 | ||||
-rw-r--r-- | libhb/decavcodec.c | 30 | ||||
-rw-r--r-- | libhb/enc_qsv.c | 39 | ||||
-rw-r--r-- | libhb/fifo.c | 16 | ||||
-rw-r--r-- | libhb/handbrake/qsv_common.h | 20 | ||||
-rw-r--r-- | libhb/hbavfilter.c | 87 | ||||
-rw-r--r-- | libhb/qsv_common.c | 382 | ||||
-rw-r--r-- | libhb/work.c | 171 |
9 files changed, 567 insertions, 321 deletions
diff --git a/libhb/avfilter.c b/libhb/avfilter.c index 64dab8371..61de084b4 100644 --- a/libhb/avfilter.c +++ b/libhb/avfilter.c @@ -11,6 +11,12 @@ #include "handbrake/hbavfilter.h" #include "handbrake/avfilter_priv.h" +#if HB_PROJECT_FEATURE_QSV +#include "handbrake/qsv_common.h" +extern int qsv_filters_are_enabled; +extern HBQSVFramesContext hb_dec_qsv_frames_ctx; +#endif + static int avfilter_init(hb_filter_object_t * filter, hb_filter_init_t * init); static int avfilter_post_init( hb_filter_object_t * filter, hb_job_t * job ); static void avfilter_close( hb_filter_object_t * filter ); @@ -232,7 +238,14 @@ static hb_buffer_t* filterFrame( hb_filter_private_t * pv, hb_buffer_t * in ) { hb_buffer_list_t list; hb_buffer_t * buf, * next; - +#if HB_PROJECT_FEATURE_QSV + mfxFrameSurface1 *surface = NULL; + // We need to keep surface pointer because hb_avfilter_add_buf set it to 0 after in ffmpeg call + if (qsv_filters_are_enabled && in && in->qsv_details.frame) + { + surface = (mfxFrameSurface1 *)in->qsv_details.frame->data[3]; + } +#endif hb_avfilter_add_buf(pv->graph, in); buf = hb_avfilter_get_buf(pv->graph); while (buf != NULL) @@ -240,7 +253,12 @@ static hb_buffer_t* filterFrame( hb_filter_private_t * pv, hb_buffer_t * in ) hb_buffer_list_append(&pv->list, buf); buf = hb_avfilter_get_buf(pv->graph); } - +#if HB_PROJECT_FEATURE_QSV + if (qsv_filters_are_enabled && surface) + { + hb_qsv_release_surface_from_pool_by_surface_pointer(&hb_dec_qsv_frames_ctx, surface); + } +#endif // Delay one frame so we can set the stop time of the output buffer hb_buffer_list_clear(&list); while (hb_buffer_list_count(&pv->list) > 1) diff --git a/libhb/cropscale.c b/libhb/cropscale.c index f934aa492..80a35447d 100644 --- a/libhb/cropscale.c +++ b/libhb/cropscale.c @@ -9,6 +9,14 @@ #include "handbrake/common.h" #include "handbrake/avfilter_priv.h" +#if HB_PROJECT_FEATURE_QSV +#include "handbrake/qsv_common.h" +#include "libavutil/hwcontext_qsv.h" +#include "libavutil/hwcontext.h" +extern int qsv_filters_are_enabled; +extern int num_cpu_filters; +HBQSVFramesContext hb_vpp_qsv_frames_ctx; +#endif static int crop_scale_init(hb_filter_object_t * filter, hb_filter_init_t * init); @@ -86,51 +94,92 @@ static int crop_scale_init(hb_filter_object_t * filter, hb_filter_init_t * init) hb_dict_t * avfilter = hb_dict_init(); hb_dict_t * avsettings = hb_dict_init(); - hb_dict_set_int(avsettings, "width", width); - hb_dict_set_int(avsettings, "height", height); - hb_dict_set_string(avsettings, "flags", "lanczos+accurate_rnd"); - switch (init->color_matrix) +#if HB_PROJECT_FEATURE_QSV + if (qsv_filters_are_enabled) { - case HB_COLR_MAT_BT709: - matrix = "bt709"; - break; - case HB_COLR_MAT_FCC: - matrix = "fcc"; - break; - case HB_COLR_MAT_SMPTE240M: - matrix = "smpte240m"; - break; - case HB_COLR_MAT_BT470BG: - case HB_COLR_MAT_SMPTE170M: - matrix = "smpte170m"; - break; - case HB_COLR_MAT_BT2020_NCL: - case HB_COLR_MAT_BT2020_CL: - matrix = "bt2020"; - break; - default: - case HB_COLR_MAT_UNDEF: - matrix = NULL; - break; - + hb_dict_set_int(avsettings, "w", width); + hb_dict_set_int(avsettings, "h", height); + hb_dict_set(avfilter, "scale_qsv", avsettings); + int result = hb_create_ffmpeg_pool(width, height, AV_PIX_FMT_NV12, HB_POOL_SURFACE_SIZE, 0, &hb_vpp_qsv_frames_ctx.hw_frames_ctx); + if (result < 0) + { + hb_error("hb_create_ffmpeg_pool vpp allocation failed"); + return result; + } + + AVHWFramesContext *frames_ctx; + AVQSVFramesContext *frames_hwctx; + AVBufferRef *hw_frames_ctx; + + hw_frames_ctx = hb_vpp_qsv_frames_ctx.hw_frames_ctx; + frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + frames_hwctx = frames_ctx->hwctx; + hb_vpp_qsv_frames_ctx.input_texture = frames_hwctx->texture; + + /* allocate the memory ids for the external frames */ + av_buffer_unref(&hb_vpp_qsv_frames_ctx.mids_buf); + hb_vpp_qsv_frames_ctx.mids_buf = hb_qsv_create_mids(hb_vpp_qsv_frames_ctx.hw_frames_ctx); + if (!hb_vpp_qsv_frames_ctx.mids_buf) + return AVERROR(ENOMEM); + hb_vpp_qsv_frames_ctx.mids = (QSVMid*)hb_vpp_qsv_frames_ctx.mids_buf->data; + hb_vpp_qsv_frames_ctx.nb_mids = frames_hwctx->nb_surfaces; + memset(hb_vpp_qsv_frames_ctx.pool, 0, hb_vpp_qsv_frames_ctx.nb_mids * sizeof(hb_vpp_qsv_frames_ctx.pool[0])); } - if (matrix != NULL) + else +#endif { - hb_dict_set_string(avsettings, "in_color_matrix", matrix); - hb_dict_set_string(avsettings, "out_color_matrix", matrix); + hb_dict_set_int(avsettings, "width", width); + hb_dict_set_int(avsettings, "height", height); + hb_dict_set_string(avsettings, "flags", "lanczos+accurate_rnd"); + + switch (init->color_matrix) + { + case HB_COLR_MAT_BT709: + matrix = "bt709"; + break; + case HB_COLR_MAT_FCC: + matrix = "fcc"; + break; + case HB_COLR_MAT_SMPTE240M: + matrix = "smpte240m"; + break; + case HB_COLR_MAT_BT470BG: + case HB_COLR_MAT_SMPTE170M: + matrix = "smpte170m"; + break; + case HB_COLR_MAT_BT2020_NCL: + case HB_COLR_MAT_BT2020_CL: + matrix = "bt2020"; + break; + default: + case HB_COLR_MAT_UNDEF: + matrix = NULL; + break; + } + if (matrix != NULL) + { + hb_dict_set_string(avsettings, "in_color_matrix", matrix); + hb_dict_set_string(avsettings, "out_color_matrix", matrix); + } + hb_dict_set_string(avsettings, "out_range", "limited"); + hb_dict_set(avfilter, "scale", avsettings); } - hb_dict_set_string(avsettings, "out_range", "limited"); - hb_dict_set(avfilter, "scale", avsettings); + hb_value_array_append(avfilters, avfilter); avfilter = hb_dict_init(); avsettings = hb_dict_init(); - // TODO: Support other pix formats - // Force output to YUV420P for until other formats are supported - hb_dict_set(avsettings, "pix_fmts", hb_value_string("yuv420p")); - hb_dict_set(avfilter, "format", avsettings); - hb_value_array_append(avfilters, avfilter); +#if HB_PROJECT_FEATURE_QSV + if (!qsv_filters_are_enabled) +#endif + { + // TODO: Support other pix formats + // Force output to YUV420P for until other formats are supported + hb_dict_set(avsettings, "pix_fmts", hb_value_string("yuv420p")); + hb_dict_set(avfilter, "format", avsettings); + hb_value_array_append(avfilters, avfilter); + } init->crop[0] = top; init->crop[1] = bottom; diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c index d562b295e..3dbb4a918 100644 --- a/libhb/decavcodec.c +++ b/libhb/decavcodec.c @@ -52,6 +52,7 @@ #include "libavutil/hwcontext_qsv.h" #include "handbrake/qsv_common.h" #include "handbrake/qsv_libav.h" +extern HBQSVFramesContext hb_dec_qsv_frames_ctx; #endif static void compute_frame_duration( hb_work_private_t *pv ); @@ -954,7 +955,7 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv ) if (pv->qsv.decode && pv->qsv.config.io_pattern == MFX_IOPATTERN_OUT_VIDEO_MEMORY) { - out = hb_qsv_copy_frame(pv->frame, pv->job->qsv.ctx); + out = hb_qsv_copy_frame(&hb_dec_qsv_frames_ctx, pv->frame, pv->job->qsv.ctx, 0); } else #endif @@ -1173,16 +1174,27 @@ int reinit_video_filters(hb_work_private_t * pv) orig_width != pv->frame->width || orig_height != pv->frame->height) { - settings = hb_dict_init(); - hb_dict_set(settings, "w", hb_value_int(orig_width)); - hb_dict_set(settings, "h", hb_value_int(orig_height)); - hb_dict_set(settings, "flags", hb_value_string("lanczos+accurate_rnd")); - hb_avfilter_append_dict(filters, "scale", settings); +#if HB_PROJECT_FEATURE_QSV + if (pv->qsv.decode && + pv->qsv.config.io_pattern == MFX_IOPATTERN_OUT_VIDEO_MEMORY) + { + hb_dict_set(settings, "w", hb_value_int(orig_width)); + hb_dict_set(settings, "h", hb_value_int(orig_height)); + hb_avfilter_append_dict(filters, "scale_qsv", settings); + } + else +#endif + { + hb_dict_set(settings, "w", hb_value_int(orig_width)); + hb_dict_set(settings, "h", hb_value_int(orig_height)); + hb_dict_set(settings, "flags", hb_value_string("lanczos+accurate_rnd")); + hb_avfilter_append_dict(filters, "scale", settings); - settings = hb_dict_init(); - hb_dict_set(settings, "pix_fmts", hb_value_string("yuv420p")); - hb_avfilter_append_dict(filters, "format", settings); + settings = hb_dict_init(); + hb_dict_set(settings, "pix_fmts", hb_value_string("yuv420p")); + hb_avfilter_append_dict(filters, "format", settings); + } } if (pv->title->rotation != HB_ROTATION_0) { diff --git a/libhb/enc_qsv.c b/libhb/enc_qsv.c index cb5c981db..7ff3e1489 100644 --- a/libhb/enc_qsv.c +++ b/libhb/enc_qsv.c @@ -42,8 +42,10 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "libavutil/hwcontext.h" #include <mfx/mfxvideo.h> +extern int qsv_filters_are_enabled; extern AVBufferRef *hb_hw_device_ctx; -EncQSVFramesContext hb_enc_qsv_frames_ctx; +HBQSVFramesContext hb_dec_qsv_frames_ctx; +extern HBQSVFramesContext hb_vpp_qsv_frames_ctx; /* * The frame info struct remembers information about each frame across calls to @@ -554,7 +556,7 @@ static int qsv_setup_mids(mfxFrameAllocResponse *resp, AVBufferRef *hw_frames_re static mfxStatus hb_qsv_frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req, mfxFrameAllocResponse *resp) { - EncQSVFramesContext *ctx = pthis; + HBQSVFramesContext *ctx = pthis; int ret; /* this should only be called from an encoder or decoder and @@ -795,7 +797,7 @@ int qsv_enc_init(hb_work_private_t *pv) // reuse parent session qsv->mfx_session = parent_session; mfxFrameAllocator frame_allocator = { - .pthis = &hb_enc_qsv_frames_ctx, + .pthis = &hb_dec_qsv_frames_ctx, .Alloc = hb_qsv_frame_alloc, .Lock = hb_qsv_frame_lock, .Unlock = hb_qsv_frame_unlock, @@ -803,6 +805,11 @@ int qsv_enc_init(hb_work_private_t *pv) .Free = hb_qsv_frame_free, }; + if (qsv_filters_are_enabled) + { + frame_allocator.pthis = &hb_vpp_qsv_frames_ctx; + } + err = MFXVideoCORE_SetFrameAllocator(qsv->mfx_session, &frame_allocator); if (err != MFX_ERR_NONE) { @@ -2155,7 +2162,14 @@ static int qsv_enc_work(hb_work_private_t *pv, mfxFrameSurface1 *surface = task->stage->in.p_surface; if(!pv->is_sys_mem && surface) { - hb_qsv_release_surface_from_pool(surface->Data.MemId); + if (qsv_filters_are_enabled) + { + hb_qsv_release_surface_from_pool(&hb_vpp_qsv_frames_ctx, surface->Data.MemId); + } + else + { + hb_qsv_release_surface_from_pool(&hb_dec_qsv_frames_ctx, surface->Data.MemId); + } } if (task->bs->DataLength > 0) @@ -2238,21 +2252,28 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) if(in->qsv_details.frame) { surface = ((mfxFrameSurface1*)in->qsv_details.frame->data[3]); - mid = surface->Data.MemId; + if (qsv_filters_are_enabled) + { + hb_qsv_get_mid_by_surface_from_pool(&hb_vpp_qsv_frames_ctx, surface, &mid); + } + else + { + hb_qsv_get_mid_by_surface_from_pool(&hb_dec_qsv_frames_ctx, surface, &mid); + } } else { // Create black buffer in the begining of the encoding, usually first 2 frames - hb_qsv_get_free_surface_from_pool(HB_POOL_SURFACE_SIZE - HB_POOL_ENCODER_SIZE, HB_POOL_SURFACE_SIZE, &mid, &surface); + hb_qsv_get_free_surface_from_pool_with_range(&hb_dec_qsv_frames_ctx, HB_POOL_SURFACE_SIZE - HB_POOL_ENCODER_SIZE, HB_POOL_SURFACE_SIZE, &mid, &surface); } - if(surface) + if (qsv_filters_are_enabled) { - hb_qsv_replace_surface_mid(mid, surface); + hb_qsv_replace_surface_mid(&hb_vpp_qsv_frames_ctx, mid, surface); } else { - goto fail; + hb_qsv_replace_surface_mid(&hb_dec_qsv_frames_ctx, mid, surface); } #endif // At this point, enc_qsv takes ownership of the QSV resources diff --git a/libhb/fifo.c b/libhb/fifo.c index 551224b05..91eacf4b0 100644 --- a/libhb/fifo.c +++ b/libhb/fifo.c @@ -715,6 +715,12 @@ void hb_buffer_swap_copy( hb_buffer_t *src, hb_buffer_t *dst ) src->alloc = alloc; } +#if HB_PROJECT_FEATURE_QSV +extern HBQSVFramesContext hb_dec_qsv_frames_ctx; +extern HBQSVFramesContext hb_vpp_qsv_frames_ctx; +extern int qsv_filters_are_enabled; +#endif + // Frees the specified buffer list. void hb_buffer_close( hb_buffer_t ** _b ) { @@ -730,9 +736,17 @@ void hb_buffer_close( hb_buffer_t ** _b ) mfxFrameSurface1 *surface = (mfxFrameSurface1*)b->qsv_details.frame->data[3]; if(surface) { - hb_qsv_release_surface_from_pool(surface->Data.MemId); + if(qsv_filters_are_enabled) + { + hb_qsv_release_surface_from_pool_by_surface_pointer(&hb_dec_qsv_frames_ctx, surface); + } + else + { + hb_qsv_release_surface_from_pool(&hb_dec_qsv_frames_ctx, surface->Data.MemId); + } b->qsv_details.frame->data[3] = 0; } + av_frame_unref(b->qsv_details.frame); } if (b->qsv_details.qsv_atom != NULL && b->qsv_details.ctx != NULL) { diff --git a/libhb/handbrake/qsv_common.h b/libhb/handbrake/qsv_common.h index cdd48935a..7bca30aed 100644 --- a/libhb/handbrake/qsv_common.h +++ b/libhb/handbrake/qsv_common.h @@ -245,15 +245,16 @@ typedef struct QSVFrame { struct QSVFrame *next; } QSVFrame; +#define HB_POOL_FFMPEG_SURFACE_SIZE (64) #define HB_POOL_SURFACE_SIZE (64) #define HB_POOL_ENCODER_SIZE (8) -typedef struct EncQSVFramesContext { +typedef struct HBQSVFramesContext { AVBufferRef *hw_frames_ctx; //void *logctx; /* The memory ids for the external frames. - * Refcounted, since we need one reference owned by the QSVFramesContext + * Refcounted, since we need one reference owned by the HBQSVFramesContext * (i.e. by the encoder/decoder) and another one given to the MFX session * from the frame allocator. */ AVBufferRef *mids_buf; @@ -261,15 +262,20 @@ typedef struct EncQSVFramesContext { int nb_mids; int pool[HB_POOL_SURFACE_SIZE]; void *input_texture; -} EncQSVFramesContext; +} HBQSVFramesContext; /* Full QSV pipeline helpers */ +int hb_qsv_init(int coded_width, int coded_height, enum AVPixelFormat sw_pix_fmt, int extra_hw_frames, AVBufferRef **out_hw_frames_ctx); +int hb_create_ffmpeg_pool(int coded_width, int coded_height, enum AVPixelFormat sw_pix_fmt, int pool_size, int extra_hw_frames, AVBufferRef **out_hw_frames_ctx); int hb_qsv_full_path_is_enabled(hb_job_t *job); AVBufferRef *hb_qsv_create_mids(AVBufferRef *hw_frames_ref); -hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx); -void hb_qsv_get_free_surface_from_pool(const int start_index, const int end_index, QSVMid **out_mid, mfxFrameSurface1 **out_surface); -int hb_qsv_replace_surface_mid(const QSVMid *mid, mfxFrameSurface1 *surface); -int hb_qsv_release_surface_from_pool(const QSVMid *mid); +hb_buffer_t* hb_qsv_copy_frame(HBQSVFramesContext* hb_qsv_frames_ctx, AVFrame *frame, hb_qsv_context *qsv_ctx, int is_vpp); +int hb_qsv_get_free_surface_from_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, AVFrame* frame, QSVMid** out_mid); +void hb_qsv_get_free_surface_from_pool_with_range(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const int start_index, const int end_index, QSVMid** out_mid, mfxFrameSurface1** out_surface); +void hb_qsv_get_mid_by_surface_from_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, mfxFrameSurface1 *surface, QSVMid **out_mid); +int hb_qsv_replace_surface_mid(HBQSVFramesContext* hb_qsv_frames_ctx, const QSVMid *mid, mfxFrameSurface1 *surface); +int hb_qsv_release_surface_from_pool(HBQSVFramesContext* hb_qsv_frames_ctx, const QSVMid *mid); +int hb_qsv_release_surface_from_pool_by_surface_pointer(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const mfxFrameSurface1 *surface); int hb_qsv_get_buffer(AVCodecContext *s, AVFrame *frame, int flags); enum AVPixelFormat hb_qsv_get_format(AVCodecContext *s, const enum AVPixelFormat *pix_fmts); int hb_qsv_preset_is_zero_copy_enabled(const hb_dict_t *job_dict); diff --git a/libhb/hbavfilter.c b/libhb/hbavfilter.c index fa86ac5f8..135492945 100644 --- a/libhb/hbavfilter.c +++ b/libhb/hbavfilter.c @@ -15,6 +15,12 @@ #include "handbrake/hbavfilter.h" #include "handbrake/avfilter_priv.h" +#if HB_PROJECT_FEATURE_QSV +#include "handbrake/qsv_common.h" +extern int qsv_filters_are_enabled; +extern HBQSVFramesContext hb_vpp_qsv_frames_ctx; +#endif + struct hb_avfilter_graph_s { AVFilterGraph * avgraph; @@ -27,7 +33,7 @@ struct hb_avfilter_graph_s }; static AVFilterContext * append_filter( hb_avfilter_graph_t * graph, - const char * name, const char * args) + const char * name, const char * args, AVBufferSrcParameters *par) { AVFilterContext * filter; int result; @@ -38,6 +44,16 @@ static AVFilterContext * append_filter( hb_avfilter_graph_t * graph, { return NULL; } + + if (par) + { + result = av_buffersrc_parameters_set(filter, par); + if (result < 0) + { + return NULL; + } + } + if (graph->last != NULL) { result = avfilter_link(graph->last, 0, filter, 0); @@ -82,9 +98,12 @@ hb_avfilter_graph_init(hb_value_t * settings, hb_filter_init_t * init) hb_error("hb_avfilter_graph_init: avfilter_graph_alloc failed"); goto fail; } - - av_opt_set(graph->avgraph, "scale_sws_opts", "lanczos+accurate_rnd", 0); - +#if HB_PROJECT_FEATURE_QSV + if (!qsv_filters_are_enabled) +#endif + { + av_opt_set(graph->avgraph, "scale_sws_opts", "lanczos+accurate_rnd", 0); + } result = avfilter_graph_parse2(graph->avgraph, settings_str, &in, &out); if (result < 0 || in == NULL || out == NULL) { @@ -93,16 +112,42 @@ hb_avfilter_graph_init(hb_value_t * settings, hb_filter_init_t * init) goto fail; } + AVBufferSrcParameters *par = 0; // Build filter input - filter_args = hb_strdup_printf( - "width=%d:height=%d:pix_fmt=%d:sar=%d/%d:" +#if HB_PROJECT_FEATURE_QSV + if (qsv_filters_are_enabled) + { + par = av_buffersrc_parameters_alloc(); + init->pix_fmt = AV_PIX_FMT_QSV; + filter_args = hb_strdup_printf( + "video_size=%dx%d:pix_fmt=%d:sar=%d/%d:" "time_base=%d/%d:frame_rate=%d/%d", init->geometry.width, init->geometry.height, init->pix_fmt, init->geometry.par.num, init->geometry.par.den, init->time_base.num, init->time_base.den, init->vrate.num, init->vrate.den); - avfilter = append_filter(graph, "buffer", filter_args); + AVBufferRef *hb_hw_frames_ctx = NULL; + result = hb_create_ffmpeg_pool(init->geometry.width, init->geometry.height, AV_PIX_FMT_NV12, 32, 0, &hb_hw_frames_ctx); + if (result < 0) + { + hb_error("hb_create_ffmpeg_pool failed"); + goto fail; + } + par->hw_frames_ctx = hb_hw_frames_ctx; + } + else +#endif + { + filter_args = hb_strdup_printf( + "width=%d:height=%d:pix_fmt=%d:sar=%d/%d:" + "time_base=%d/%d:frame_rate=%d/%d", + init->geometry.width, init->geometry.height, init->pix_fmt, + init->geometry.par.num, init->geometry.par.den, + init->time_base.num, init->time_base.den, + init->vrate.num, init->vrate.den); + } + avfilter = append_filter(graph, "buffer", filter_args, par); free(filter_args); if (avfilter == NULL) { @@ -120,7 +165,7 @@ hb_avfilter_graph_init(hb_value_t * settings, hb_filter_init_t * init) graph->last = out->filter_ctx; // Build filter output - avfilter = append_filter(graph, "buffersink", NULL); + avfilter = append_filter(graph, "buffersink", NULL, 0); if (avfilter == NULL) { hb_error("hb_avfilter_graph_init: failed to create buffer output filter"); @@ -224,8 +269,18 @@ int hb_avfilter_add_buf(hb_avfilter_graph_t * graph, hb_buffer_t * in) { if (in != NULL) { - hb_video_buffer_to_avframe(graph->frame, in); - return av_buffersrc_add_frame(graph->input, graph->frame); +#if HB_PROJECT_FEATURE_QSV + if (qsv_filters_are_enabled) + { + hb_video_buffer_to_avframe(in->qsv_details.frame, in); + return hb_avfilter_add_frame(graph, in->qsv_details.frame); + } + else +#endif + { + hb_video_buffer_to_avframe(graph->frame, in); + return av_buffersrc_add_frame(graph->input, graph->frame); + } } else { @@ -241,7 +296,17 @@ hb_buffer_t * hb_avfilter_get_buf(hb_avfilter_graph_t * graph) if (result >= 0) { hb_buffer_t * buf; - buf = hb_avframe_to_video_buffer(graph->frame, graph->out_time_base); +#if HB_PROJECT_FEATURE_QSV + if (qsv_filters_are_enabled) + { + buf = hb_qsv_copy_frame(&hb_vpp_qsv_frames_ctx, graph->frame, 0, 1); + hb_avframe_set_video_buffer_flags(buf, graph->frame, graph->out_time_base); + } + else +#endif + { + buf = hb_avframe_to_video_buffer(graph->frame, graph->out_time_base); + } av_frame_unref(graph->frame); return buf; } diff --git a/libhb/qsv_common.c b/libhb/qsv_common.c index 72e75ec6d..12bfa0c29 100644 --- a/libhb/qsv_common.c +++ b/libhb/qsv_common.c @@ -7,6 +7,7 @@ * For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ +#include "handbrake/handbrake.h" #include "handbrake/project.h" #if HB_PROJECT_FEATURE_QSV @@ -28,6 +29,8 @@ #include "libavutil/hwcontext_qsv.h" #include "libavutil/hwcontext.h" +extern HBQSVFramesContext hb_dec_qsv_frames_ctx; + // QSV info for each codec static hb_qsv_info_t *hb_qsv_info_avc = NULL; static hb_qsv_info_t *hb_qsv_info_hevc = NULL; @@ -986,12 +989,14 @@ int hb_qsv_decode_is_enabled(hb_job_t *job) static int hb_dxva2_device_check(); static int hb_d3d11va_device_check(); +extern int qsv_filters_are_enabled; +extern int num_cpu_filters; int hb_qsv_full_path_is_enabled(hb_job_t *job) { static int device_check_completed = 0; static int device_check_succeded = 0; - int filter_count = hb_list_count(job->list_filter); + int qsv_full_path_is_enabled = 0; if(!device_check_completed) { @@ -999,10 +1004,11 @@ int hb_qsv_full_path_is_enabled(hb_job_t *job) || (hb_dxva2_device_check() == 0)) ? 1 : 0; device_check_completed = 1; } - return (hb_qsv_decode_is_enabled(job) && + + qsv_full_path_is_enabled = (hb_qsv_decode_is_enabled(job) && hb_qsv_info_get(job->vcodec) && - device_check_succeded && - (filter_count == 0)); + device_check_succeded && !num_cpu_filters); + return qsv_full_path_is_enabled; } int hb_qsv_copyframe_is_slow(int encoder) @@ -2291,8 +2297,6 @@ void hb_qsv_force_workarounds() #undef FORCE_WORKAROUNDS } -AVBufferRef *enc_hw_frames_ctx = NULL; -extern EncQSVFramesContext hb_enc_qsv_frames_ctx; AVBufferRef *hb_hw_device_ctx = NULL; char *qsv_device = NULL; static mfxHDL device_manager_handle = NULL; @@ -2539,42 +2543,104 @@ static int hb_qsv_find_surface_idx(const QSVMid *mids, const int nb_mids, const return -1; } -int hb_qsv_replace_surface_mid(const QSVMid *mid, mfxFrameSurface1 *surface) +int hb_qsv_replace_surface_mid(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const QSVMid *mid, mfxFrameSurface1 *surface) { - int ret = hb_qsv_find_surface_idx(hb_enc_qsv_frames_ctx.mids, hb_enc_qsv_frames_ctx.nb_mids, mid); + int ret = hb_qsv_find_surface_idx(hb_enc_qsv_frames_ctx->mids, hb_enc_qsv_frames_ctx->nb_mids, mid); if (ret < 0) { - hb_error("encqsv: Surface with MemId=%p has not been found in the pool\n", mid); + hb_error("hb_qsv_replace_surface_mid: Surface with MemId=%p has not been found in the pool", mid); return -1; } else { - surface->Data.MemId = &hb_enc_qsv_frames_ctx.mids[ret]; + surface->Data.MemId = &hb_enc_qsv_frames_ctx->mids[ret]; } return 0; } -int hb_qsv_release_surface_from_pool(const QSVMid *mid) +int hb_qsv_release_surface_from_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const QSVMid *mid) { - int ret = hb_qsv_find_surface_idx(hb_enc_qsv_frames_ctx.mids, hb_enc_qsv_frames_ctx.nb_mids, mid); + int ret = hb_qsv_find_surface_idx(hb_enc_qsv_frames_ctx->mids, hb_enc_qsv_frames_ctx->nb_mids, mid); if (ret < 0) { - hb_error("encqsv: Surface with MemId=%p has not been found in the pool\n", mid); + hb_error("hb_qsv_release_surface_from_pool: Surface with MemId=%p has not been found in the pool", mid); return -1; } + else if(hb_enc_qsv_frames_ctx->pool[ret] == 1) + { + ff_qsv_atomic_dec(&hb_enc_qsv_frames_ctx->pool[ret]); + } else { - ff_qsv_atomic_dec(&hb_enc_qsv_frames_ctx.pool[ret]); + hb_error("hb_qsv_release_surface_from_pool: Surface with index=%d and MemId=%p is used more than once", ret, mid); + return -1; } return 0; } -void hb_qsv_get_free_surface_from_pool(const int start_index, const int end_index, QSVMid **out_mid, mfxFrameSurface1 **out_surface) +int hb_qsv_release_surface_from_pool_by_surface_pointer(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const mfxFrameSurface1 *surface) +{ + int count = 0; + + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hb_enc_qsv_frames_ctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; + + while(1) + { + if(count > 30) + { + hb_error("hb_qsv_release_surface_from_pool_by_surface: surface=%p has not been found or busy", surface); + hb_qsv_sleep(10); // prevent hang when all surfaces all used + count = 0; + } + + for(int i = 0; i < hb_enc_qsv_frames_ctx->nb_mids; i++) + { + mfxFrameSurface1 *pool_surface = &frames_hwctx->surfaces[i]; + if( (pool_surface->Data.Locked == 0) && (surface == pool_surface)) + { + ff_qsv_atomic_dec(&hb_enc_qsv_frames_ctx->pool[i]); + return 0; + } + } + count++; + } +} + +void hb_qsv_get_mid_by_surface_from_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, mfxFrameSurface1 *surface, QSVMid **out_mid) { QSVMid *mid = NULL; - mfxFrameSurface1 *output_surface = NULL; + + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hb_enc_qsv_frames_ctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; + // find the first available surface in the pool + int count = 0; + while(1) + { + if(count > 30) + { + hb_error("hb_qsv_get_mid_by_surface_from_pool has not been found or busy", mid); + hb_qsv_sleep(10); // prevent hang when all surfaces all used + count = 0; + } - AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hb_enc_qsv_frames_ctx.hw_frames_ctx->data; + for(int i = 0; i < hb_enc_qsv_frames_ctx->nb_mids; i++) + { + mid = &hb_enc_qsv_frames_ctx->mids[i]; + mfxFrameSurface1 *pool_surface = &frames_hwctx->surfaces[i]; + if( (pool_surface->Data.Locked == 0) && (surface == pool_surface)) + { + *out_mid = mid; + return; + } + } + count++; + } +} + +int hb_qsv_get_free_surface_from_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, AVFrame* frame, QSVMid** out_mid) +{ + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hb_enc_qsv_frames_ctx->hw_frames_ctx->data; AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; // find the first available surface in the pool @@ -2587,34 +2653,47 @@ void hb_qsv_get_free_surface_from_pool(const int start_index, const int end_inde count = 0; } - for(int i = start_index; i < end_index; i++) + int ret = av_hwframe_get_buffer(hb_enc_qsv_frames_ctx->hw_frames_ctx, frame, 0); + if (ret) { - if(hb_enc_qsv_frames_ctx.pool[i] == 0) + return -1; + } + else + { + mfxFrameSurface1 *output_surface = (mfxFrameSurface1 *)frame->data[3]; + for(int i = 0; i < hb_enc_qsv_frames_ctx->nb_mids; i++) { - mid = &hb_enc_qsv_frames_ctx.mids[i]; - output_surface = &frames_hwctx->surfaces[i]; - if(output_surface->Data.Locked == 0) + QSVMid* mid = &hb_enc_qsv_frames_ctx->mids[i]; + mfxFrameSurface1* cur_surface = &frames_hwctx->surfaces[i]; + if(cur_surface == output_surface) { - *out_mid = mid; - *out_surface = output_surface; - ff_qsv_atomic_inc(&hb_enc_qsv_frames_ctx.pool[i]); - return; + if((hb_enc_qsv_frames_ctx->pool[i]) == 0 && (output_surface->Data.Locked == 0)) + { + *out_mid = mid; + ff_qsv_atomic_inc(&hb_enc_qsv_frames_ctx->pool[i]); + return 0; + } + else + { + // we need to do unref if surface is not taken to be used, otherwise -12. + av_frame_unref(frame); + break; + } } } } - count++; } } -static int hb_qsv_allocate_dx11_encoder_pool(ID3D11Device *device, ID3D11Texture2D* input_texture) +static int hb_qsv_allocate_dx11_encoder_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, ID3D11Device *device, ID3D11Texture2D* input_texture) { D3D11_TEXTURE2D_DESC desc = { 0 }; ID3D11Texture2D_GetDesc(input_texture, &desc); desc.ArraySize = 1; desc.BindFlags = D3D10_BIND_RENDER_TARGET; - for (size_t i = 0; i < hb_enc_qsv_frames_ctx.nb_mids; i++) + for (size_t i = 0; i < hb_enc_qsv_frames_ctx->nb_mids; i++) { ID3D11Texture2D* texture; HRESULT hr = ID3D11Device_CreateTexture2D(device, &desc, NULL, &texture); @@ -2624,20 +2703,20 @@ static int hb_qsv_allocate_dx11_encoder_pool(ID3D11Device *device, ID3D11Texture return -1; } - QSVMid *mid = &hb_enc_qsv_frames_ctx.mids[i]; + QSVMid *mid = &hb_enc_qsv_frames_ctx->mids[i]; mid->handle = 0; mid->texture = texture; } return 0; } -static int hb_qsv_deallocate_dx11_encoder_pool() +static int hb_qsv_deallocate_dx11_encoder_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx) { if (device_manager_handle_type == MFX_HANDLE_D3D11_DEVICE) { - for (size_t i = 0; i < hb_enc_qsv_frames_ctx.nb_mids; i++) + for (size_t i = 0; i < hb_enc_qsv_frames_ctx->nb_mids; i++) { - QSVMid *mid = &hb_enc_qsv_frames_ctx.mids[i]; + QSVMid *mid = &hb_enc_qsv_frames_ctx->mids[i]; ID3D11Texture2D* texture = mid->texture; if (texture) { @@ -2654,7 +2733,7 @@ static int hb_qsv_deallocate_dx11_encoder_pool() return 0; } -static int hb_qsv_get_dx_device() +static int hb_qsv_get_dx_device(HBQSVFramesContext* hb_enc_qsv_frames_ctx) { AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)hb_hw_device_ctx->data; AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; @@ -2694,8 +2773,8 @@ static int hb_qsv_get_dx_device() if (device_manager_handle_type == MFX_HANDLE_D3D11_DEVICE) { ID3D11Device *device = (ID3D11Device *)device_manager_handle; - ID3D11Texture2D* input_texture = hb_enc_qsv_frames_ctx.input_texture; - err = hb_qsv_allocate_dx11_encoder_pool(device, input_texture); + ID3D11Texture2D* input_texture = hb_enc_qsv_frames_ctx->input_texture; + err = hb_qsv_allocate_dx11_encoder_pool(hb_enc_qsv_frames_ctx, device, input_texture); if (err < 0) { hb_error("hb_qsv_get_dx_device: hb_qsv_allocate_dx11_encoder_pool failed"); @@ -2712,7 +2791,41 @@ static int hb_qsv_get_dx_device() return 0; } -hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) +void hb_qsv_get_free_surface_from_pool_with_range(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const int start_index, const int end_index, QSVMid** out_mid, mfxFrameSurface1** out_surface) +{ + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hb_enc_qsv_frames_ctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; + + // find the first available surface in the pool + int count = 0; + while(1) + { + if (count > 30) + { + hb_qsv_sleep(10); // prevent hang when all surfaces all used + count = 0; + } + + for (int i = start_index; i < end_index; i++) + { + if ((hb_enc_qsv_frames_ctx->pool[i]) == 0) + { + QSVMid* mid = &hb_enc_qsv_frames_ctx->mids[i]; + mfxFrameSurface1* cur_surface = &frames_hwctx->surfaces[i]; + if (cur_surface->Data.Locked == 0) + { + *out_mid = mid; + *out_surface = cur_surface; + ff_qsv_atomic_inc(&hb_enc_qsv_frames_ctx->pool[i]); + return; + } + } + } + count++; + } +} + +hb_buffer_t* hb_qsv_copy_frame(HBQSVFramesContext* hb_qsv_frames_ctx, AVFrame *frame, hb_qsv_context *qsv_ctx, int is_vpp) { hb_buffer_t *out; out = hb_frame_buffer_init(frame->format, frame->width, frame->height); @@ -2724,43 +2837,75 @@ hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) return out; } + out->qsv_details.frame->format = frame->format; + out->qsv_details.frame->width = frame->width; + out->qsv_details.frame->height = frame->height; + out->qsv_details.frame->channels = frame->channels; + out->qsv_details.frame->channel_layout = frame->channel_layout; + out->qsv_details.frame->nb_samples = frame->nb_samples; + + int ret = av_frame_copy_props(out->qsv_details.frame, frame); + if (ret < 0) + { + hb_error("hb_qsv_copy_frame: av_frame_copy_props error %d", ret); + } + // copy content of input frame - av_frame_copy(out->qsv_details.frame, frame); - // but no copy the sufrace pointer, it will be added later from the pool - out->qsv_details.frame->data[3] = 0; + ret = av_frame_copy(out->qsv_details.frame, frame); + if (ret < 0) { + hb_error("hb_qsv_copy_frame: av_frame_copy error %d", ret); + } QSVMid *mid = NULL; mfxFrameSurface1* output_surface = NULL; - hb_qsv_get_free_surface_from_pool(0, HB_POOL_SURFACE_SIZE - HB_POOL_ENCODER_SIZE, &mid, &output_surface); + + if (!is_vpp && qsv_filters_are_enabled) + { + ret = hb_qsv_get_free_surface_from_pool(hb_qsv_frames_ctx, out->qsv_details.frame, &mid); + if (ret < 0) + return out; + output_surface = (mfxFrameSurface1*)out->qsv_details.frame->data[3]; + } + else + { + hb_qsv_get_free_surface_from_pool_with_range(hb_qsv_frames_ctx, 0, HB_POOL_SURFACE_SIZE - HB_POOL_ENCODER_SIZE, &mid, &output_surface); + } if (device_manager_handle_type == MFX_HANDLE_D3D9_DEVICE_MANAGER) { mfxFrameSurface1* input_surface = (mfxFrameSurface1*)frame->data[3]; - // copy all surface fields - *output_surface = *input_surface; - // replace the mem id to mem id from the pool - output_surface->Data.MemId = mid; + if (qsv_filters_are_enabled) + { + mfxMemId mem = output_surface->Data.MemId; + *output_surface = *input_surface; + output_surface->Data.MemId = mem; // todo because get_hdl function in qsv scale filter is not implemented in the ffmpeg patch + } + else + { + // replace the mem id to mem id from the pool + output_surface->Data.MemId = mid; + } // copy input sufrace to sufrace from the pool IDirect3DDevice9 *pDevice = NULL; HANDLE handle; - HRESULT result = lock_device((IDirect3DDeviceManager9 *)device_manager_handle, 0, &pDevice, &handle); + HRESULT result = lock_device((IDirect3DDeviceManager9 *)device_manager_handle, 1, &pDevice, &handle); if (FAILED(result)) { - hb_error("hb_qsv_copy_frame: lock_device failded %d", result); + hb_error("hb_qsv_copy_frame: lock_device failed %d", result); return out; } result = IDirect3DDevice9_StretchRect(pDevice, input_surface->Data.MemId, 0, mid->handle, 0, D3DTEXF_LINEAR); if (FAILED(result)) { - hb_error("hb_qsv_copy_frame: IDirect3DDevice9_StretchRect failded %d", result); + hb_error("hb_qsv_copy_frame: IDirect3DDevice9_StretchRect failed %d", result); return out; } result = unlock_device((IDirect3DDeviceManager9 *)device_manager_handle, handle); if (FAILED(result)) { - hb_error("hb_qsv_copy_frame: unlock_device failded %d", result); + hb_error("hb_qsv_copy_frame: unlock_device failed %d", result); return out; } } @@ -2773,7 +2918,7 @@ hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) // replace the mem id to mem id from the pool output_surface->Data.MemId = mid; // copy input sufrace to sufrace from the pool - ID3D11DeviceContext_CopySubresourceRegion(device_context, mid->texture, (uint64_t)mid->handle, 0, 0, 0, hb_enc_qsv_frames_ctx.input_texture, (uint64_t)input_surface->Data.MemId, NULL); + ID3D11DeviceContext_CopySubresourceRegion(device_context, mid->texture, (uint64_t)mid->handle, 0, 0, 0, hb_qsv_frames_ctx->input_texture, (uint64_t)input_surface->Data.MemId, NULL); ID3D11DeviceContext_Flush(device_context); } else @@ -2803,16 +2948,22 @@ void hb_qsv_uninit_dec(AVCodecContext *s) av_buffer_unref(&s->hw_frames_ctx); } +extern HBQSVFramesContext hb_vpp_qsv_frames_ctx; + void hb_qsv_uninit_enc() { - if(enc_hw_frames_ctx) - av_buffer_unref(&enc_hw_frames_ctx); + if(hb_dec_qsv_frames_ctx.hw_frames_ctx) + av_buffer_unref(&hb_dec_qsv_frames_ctx.hw_frames_ctx); + + if(hb_vpp_qsv_frames_ctx.hw_frames_ctx) + av_buffer_unref(&hb_vpp_qsv_frames_ctx.hw_frames_ctx); - enc_hw_frames_ctx = NULL; + hb_dec_qsv_frames_ctx.hw_frames_ctx = NULL; + hb_vpp_qsv_frames_ctx.hw_frames_ctx = NULL; hb_hw_device_ctx = NULL; qsv_device = NULL; device_manager_handle = NULL; - hb_qsv_deallocate_dx11_encoder_pool(); + hb_qsv_deallocate_dx11_encoder_pool(&hb_dec_qsv_frames_ctx); if (device_context) { ID3D11DeviceContext_Release(device_context); @@ -2820,7 +2971,7 @@ void hb_qsv_uninit_enc() } } -static int qsv_device_init(AVCodecContext *s) +static int qsv_device_init() { int err; AVDictionary *dict = NULL; @@ -2831,8 +2982,15 @@ static int qsv_device_init(AVCodecContext *s) return err; } - err = av_dict_set(&dict, "child_device_type", "d3d11va", 0); - err = av_dict_set(&dict, "vendor", "0x8086", 0); + if (!qsv_filters_are_enabled) + { + err = av_dict_set(&dict, "child_device_type", "d3d11va", 0); + err = av_dict_set(&dict, "vendor", "0x8086", 0); + } + else + { + err = av_dict_set(&dict, "child_device_type", "dxva2", 0); + } err = av_hwdevice_ctx_create(&hb_hw_device_ctx, AV_HWDEVICE_TYPE_QSV, 0, dict, 0); @@ -2848,72 +3006,82 @@ err_out: return err; } -static int qsv_init(AVCodecContext *s) +int hb_create_ffmpeg_pool(int coded_width, int coded_height, enum AVPixelFormat sw_pix_fmt, int pool_size, int extra_hw_frames, AVBufferRef **out_hw_frames_ctx) { AVHWFramesContext *frames_ctx; AVQSVFramesContext *frames_hwctx; + AVBufferRef *hw_frames_ctx = *out_hw_frames_ctx; + int ret; if (!hb_hw_device_ctx) { - ret = qsv_device_init(s); + ret = qsv_device_init(); if (ret < 0) return ret; } - av_buffer_unref(&s->hw_frames_ctx); - s->hw_frames_ctx = av_hwframe_ctx_alloc(hb_hw_device_ctx); - if (!s->hw_frames_ctx) + av_buffer_unref(&hw_frames_ctx); + hw_frames_ctx = av_hwframe_ctx_alloc(hb_hw_device_ctx); + if (!hw_frames_ctx) return AVERROR(ENOMEM); - frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; + *out_hw_frames_ctx = hw_frames_ctx; + + frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; frames_hwctx = frames_ctx->hwctx; - frames_ctx->width = FFALIGN(s->coded_width, 32); - frames_ctx->height = FFALIGN(s->coded_height, 32); + frames_ctx->width = FFALIGN(coded_width, 32); + frames_ctx->height = FFALIGN(coded_height, 32); frames_ctx->format = AV_PIX_FMT_QSV; - frames_ctx->sw_format = s->sw_pix_fmt; - frames_ctx->initial_pool_size = 32 + s->extra_hw_frames; + frames_ctx->sw_format = sw_pix_fmt; + frames_ctx->initial_pool_size = pool_size + extra_hw_frames; frames_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; - ret = av_hwframe_ctx_init(s->hw_frames_ctx); + ret = av_hwframe_ctx_init(hw_frames_ctx); if (ret < 0) { - hb_error("qsv_init: av_hwframe_ctx_init failed %d", ret); + hb_error("hb_create_ffmpeg_pool: av_hwframe_ctx_init failed %d", ret); return ret; } - hb_enc_qsv_frames_ctx.input_texture = frames_hwctx->texture; + return 0; +} - av_buffer_unref(&enc_hw_frames_ctx); - enc_hw_frames_ctx = av_hwframe_ctx_alloc(hb_hw_device_ctx); - if (!enc_hw_frames_ctx) - return AVERROR(ENOMEM); +int hb_qsv_init(int coded_width, int coded_height, enum AVPixelFormat sw_pix_fmt, int extra_hw_frames, AVBufferRef **out_hw_frames_ctx) +{ + AVHWFramesContext *frames_ctx; + AVQSVFramesContext *frames_hwctx; + AVBufferRef *hw_frames_ctx; - hb_enc_qsv_frames_ctx.hw_frames_ctx = enc_hw_frames_ctx; - frames_ctx = (AVHWFramesContext*)enc_hw_frames_ctx->data; - frames_hwctx = frames_ctx->hwctx; + int ret; + + ret = hb_create_ffmpeg_pool(coded_width, coded_height, sw_pix_fmt, HB_POOL_FFMPEG_SURFACE_SIZE, extra_hw_frames, out_hw_frames_ctx); + if (ret < 0) { + hb_error("hb_qsv_init: hb_create_ffmpeg_pool decoder failed %d", ret); + return ret; + } - frames_ctx->width = FFALIGN(s->coded_width, 32); - frames_ctx->height = FFALIGN(s->coded_height, 32); - frames_ctx->format = AV_PIX_FMT_QSV; - frames_ctx->sw_format = s->sw_pix_fmt; - frames_ctx->initial_pool_size = HB_POOL_SURFACE_SIZE; - frames_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + hw_frames_ctx = *out_hw_frames_ctx; + frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + frames_hwctx = frames_ctx->hwctx; + hb_dec_qsv_frames_ctx.input_texture = frames_hwctx->texture; - ret = av_hwframe_ctx_init(enc_hw_frames_ctx); + ret = hb_create_ffmpeg_pool(coded_width, coded_height, sw_pix_fmt, HB_POOL_SURFACE_SIZE, extra_hw_frames, &hb_dec_qsv_frames_ctx.hw_frames_ctx); if (ret < 0) { - hb_error("qsv_init: av_hwframe_ctx_init failed %d", ret); + hb_error("hb_qsv_init: hb_create_ffmpeg_pool qsv surface allocation failed %d", ret); return ret; } + /* allocate the memory ids for the external frames */ - av_buffer_unref(&hb_enc_qsv_frames_ctx.mids_buf); - hb_enc_qsv_frames_ctx.mids_buf = hb_qsv_create_mids(hb_enc_qsv_frames_ctx.hw_frames_ctx); - if (!hb_enc_qsv_frames_ctx.mids_buf) + av_buffer_unref(&hb_dec_qsv_frames_ctx.mids_buf); + hb_dec_qsv_frames_ctx.mids_buf = hb_qsv_create_mids(hb_dec_qsv_frames_ctx.hw_frames_ctx); + if (!hb_dec_qsv_frames_ctx.mids_buf) return AVERROR(ENOMEM); - hb_enc_qsv_frames_ctx.mids = (QSVMid*)hb_enc_qsv_frames_ctx.mids_buf->data; - hb_enc_qsv_frames_ctx.nb_mids = frames_hwctx->nb_surfaces; - memset(hb_enc_qsv_frames_ctx.pool, 0, hb_enc_qsv_frames_ctx.nb_mids * sizeof(hb_enc_qsv_frames_ctx.pool[0])); - ret = hb_qsv_get_dx_device(); + hb_dec_qsv_frames_ctx.mids = (QSVMid*)hb_dec_qsv_frames_ctx.mids_buf->data; + hb_dec_qsv_frames_ctx.nb_mids = frames_hwctx->nb_surfaces; + memset(hb_dec_qsv_frames_ctx.pool, 0, hb_dec_qsv_frames_ctx.nb_mids * sizeof(hb_dec_qsv_frames_ctx.pool[0])); + + ret = hb_qsv_get_dx_device(&hb_dec_qsv_frames_ctx); if (ret < 0) { hb_error("qsv_init: hb_qsv_get_dx_device failed %d", ret); return ret; @@ -2933,12 +3101,11 @@ enum AVPixelFormat hb_qsv_get_format(AVCodecContext *s, const enum AVPixelFormat { while (*pix_fmts != AV_PIX_FMT_NONE) { if (*pix_fmts == AV_PIX_FMT_QSV) { - int ret = qsv_init(s); - if (ret < 0) { - hb_error("hb_qsv_get_format: QSV hwaccel initialization failed"); - return AV_PIX_FMT_NONE; - } - + int ret = hb_qsv_init(s->coded_width, s->coded_height, s->sw_pix_fmt, s->extra_hw_frames, &s->hw_frames_ctx); + if (ret < 0) { + hb_error("hb_qsv_get_format: QSV hwaccel initialization failed"); + return AV_PIX_FMT_NONE; + } if (s->hw_frames_ctx) { s->hw_frames_ctx = av_buffer_ref(s->hw_frames_ctx); if (!s->hw_frames_ctx) @@ -2993,12 +3160,27 @@ int hb_qsv_preset_is_zero_copy_enabled(const hb_dict_t *job_dict) #else // other OS -hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) +int hb_create_ffmpeg_pool(int coded_width, int coded_height, enum AVPixelFormat sw_pix_fmt, int pool_size, int extra_hw_frames, AVBufferRef **out_hw_frames_ctx) +{ + return -1; +} + +int hb_qsv_init(int coded_width, int coded_height, enum AVPixelFormat sw_pix_fmt, int extra_hw_frames, AVBufferRef **out_hw_frames_ctx) +{ + return -1; +} + +hb_buffer_t* hb_qsv_copy_frame(HBQSVFramesContext* hb_qsv_frames_ctx, AVFrame *frame, hb_qsv_context *qsv_ctx, int is_vpp) { return NULL; } -void hb_qsv_get_free_surface_from_pool(const int start_index, const int end_index, QSVMid **out_mid, mfxFrameSurface1 **out_surface) +int hb_qsv_get_free_surface_from_pool(HBQSVFramesContext* hb_enc_qsv_frames_ctx, AVFrame* frame, QSVMid** out_mid) +{ + return -1; +} + +void hb_qsv_get_free_surface_from_pool_with_range(HBQSVFramesContext* hb_enc_qsv_frames_ctx, const int start_index, const int end_index, QSVMid** out_mid, mfxFrameSurface1** out_surface) { return; } diff --git a/libhb/work.c b/libhb/work.c index d714c988f..277e7415e 100644 --- a/libhb/work.c +++ b/libhb/work.c @@ -1226,172 +1226,49 @@ static int sanitize_audio(hb_job_t *job) return 0; } +#if HB_PROJECT_FEATURE_QSV +int num_cpu_filters = 0; +int qsv_filters_are_enabled = 0; +#endif + static int sanitize_qsv( hb_job_t * job ) { #if HB_PROJECT_FEATURE_QSV -#if 0 // TODO: re-implement QSV VPP filtering and QSV zerocopy path - int i; - - /* - * XXX: mfxCoreInterface's CopyFrame doesn't work in old drivers, and our - * workaround is really slow. If we have validated CPU-based filters in - * the list and we can't use CopyFrame, disable QSV decoding until a - * better solution is implemented. - */ - if (hb_qsv_copyframe_is_slow(job->vcodec)) - { - if (job->list_filter != NULL) - { - int encode_only = 0; - for (i = 0; i < hb_list_count(job->list_filter) && !encode_only; i++) - { - hb_filter_object_t *filter = hb_list_item(job->list_filter, i); - switch (filter->id) - { - // validated, CPU-based filters - case HB_FILTER_ROTATE: - case HB_FILTER_RENDER_SUB: - case HB_FILTER_AVFILTER: - encode_only = 1; - break; - - // CPU-based deinterlace (validated) - case HB_FILTER_DEINTERLACE: - { - int mode = hb_dict_get_int(filter->settings, "mode"); - if (!(mode & MODE_DEINTERLACE_QSV)) - { - encode_only = 1; - } - } break; - - // other filters will be removed - default: - break; - } - } - if (encode_only) - { - hb_log("do_job: QSV: possible CopyFrame bug, using encode-only path"); - if (hb_get_cpu_platform() >= HB_CPU_PLATFORM_INTEL_IVB) - { - hb_log("do_job: QSV: please update your Intel graphics driver to version 9.18.10.3257 or later"); - } - job->qsv.decode = 0; - } - } - } - /* * When QSV's VPP is used for filtering, not all CPU filters * are supported, so we need to do a little extra setup here. */ + int i = 0; + qsv_filters_are_enabled = 0; + num_cpu_filters = 0; if (job->vcodec & HB_VCODEC_QSV_MASK) { - int vpp_settings[7]; - int num_cpu_filters = 0; - hb_filter_object_t *filter; - // default values for VPP filter - vpp_settings[0] = job->title->geometry.width; - vpp_settings[1] = job->title->geometry.height; - vpp_settings[2] = job->title->crop[0]; - vpp_settings[3] = job->title->crop[1]; - vpp_settings[4] = job->title->crop[2]; - vpp_settings[5] = job->title->crop[3]; - vpp_settings[6] = 0; // deinterlace: off if (job->list_filter != NULL && hb_list_count(job->list_filter) > 0) { - while (hb_list_count(job->list_filter) > num_cpu_filters) + for (i = 0; i < hb_list_count(job->list_filter); i++) { - filter = hb_list_item(job->list_filter, num_cpu_filters); + hb_filter_object_t *filter = hb_list_item(job->list_filter, i); + switch (filter->id) { // cropping and scaling always done via VPP filter case HB_FILTER_CROP_SCALE: - hb_dict_extract_int(&vpp_settings[0], filter->settings, - "width"); - hb_dict_extract_int(&vpp_settings[1], filter->settings, - "height"); - hb_dict_extract_int(&vpp_settings[2], filter->settings, - "crop-top"); - hb_dict_extract_int(&vpp_settings[3], filter->settings, - "crop-bottom"); - hb_dict_extract_int(&vpp_settings[4], filter->settings, - "crop-left"); - hb_dict_extract_int(&vpp_settings[5], filter->settings, - "crop-right"); - - hb_list_rem(job->list_filter, filter); - hb_filter_close(&filter); break; - // pick VPP or CPU deinterlace depending on settings case HB_FILTER_DEINTERLACE: - { - int mode = hb_dict_get_int(filter->settings, "mode"); - if (mode & MODE_DEINTERLACE_QSV) - { - // deinterlacing via VPP filter - vpp_settings[6] = 1; - hb_list_rem(job->list_filter, filter); - hb_filter_close(&filter); - } - else - { - // validated - num_cpu_filters++; - } - } break; - - // then, validated filters - case HB_FILTER_ROTATE: // TODO: use Media SDK for this + case HB_FILTER_ROTATE: case HB_FILTER_RENDER_SUB: case HB_FILTER_AVFILTER: num_cpu_filters++; break; - - // finally, drop all unsupported filters default: - hb_log("do_job: QSV: full path, removing unsupported filter '%s'", - filter->name); - hb_list_rem(job->list_filter, filter); - hb_filter_close(&filter); + num_cpu_filters++; break; } } - if (num_cpu_filters > 0) - { - // we need filters to copy to system memory and back - filter = hb_filter_init(HB_FILTER_QSV_PRE); - hb_add_filter_dict(job, filter, NULL); - filter = hb_filter_init(HB_FILTER_QSV_POST); - hb_add_filter_dict(job, filter, NULL); - } - if (vpp_settings[0] != job->title->geometry.width || - vpp_settings[1] != job->title->geometry.height || - vpp_settings[2] >= 1 /* crop */ || - vpp_settings[3] >= 1 /* crop */ || - vpp_settings[4] >= 1 /* crop */ || - vpp_settings[5] >= 1 /* crop */ || - vpp_settings[6] >= 1 /* deinterlace */) - { - // we need the VPP filter - hb_dict_t * dict = hb_dict_init(); - hb_dict_set(dict, "width", hb_value_int(vpp_settings[0])); - hb_dict_set(dict, "height", hb_value_int(vpp_settings[1])); - hb_dict_set(dict, "crop-top", hb_value_int(vpp_settings[2])); - hb_dict_set(dict, "crop-bottom", hb_value_int(vpp_settings[3])); - hb_dict_set(dict, "crop-left", hb_value_int(vpp_settings[4])); - hb_dict_set(dict, "crop-right", hb_value_int(vpp_settings[5])); - hb_dict_set(dict, "deinterlace", hb_value_int(vpp_settings[6])); - - filter = hb_filter_init(HB_FILTER_QSV); - hb_add_filter_dict(job, filter, dict); - hb_value_free(&dict); - } } + qsv_filters_are_enabled = ((hb_list_count(job->list_filter) == 1) && hb_qsv_full_path_is_enabled(job)) ? 1 : 0; } -#endif // QSV VPP filtering and QSV zerocopy path #endif // HB_PROJECT_FEATURE_QSV return 0; @@ -1521,15 +1398,7 @@ static void do_job(hb_job_t *job) goto cleanup; } - // sanitize_qsv looks for subtitle render filter, so must happen after - // sanitize_subtitle - result = sanitize_qsv(job); - if (result) - { - *job->done_error = HB_ERROR_WRONG_INPUT; - *job->die = 1; - goto cleanup; - } + // Filters have an effect on settings. // So initialize the filters and update the job. if (job->list_filter && hb_list_count(job->list_filter)) @@ -1538,6 +1407,16 @@ static void do_job(hb_job_t *job) sanitize_filter_list(job->list_filter, title->geometry); + // sanitize_qsv looks for subtitle render filter, so must happen after + // sanitize_subtitle + result = sanitize_qsv(job); + if (result) + { + *job->done_error = HB_ERROR_WRONG_INPUT; + *job->die = 1; + goto cleanup; + } + memset(&init, 0, sizeof(init)); init.time_base.num = 1; init.time_base.den = 90000; |