diff options
Diffstat (limited to 'libhb/enc_qsv.c')
-rw-r--r-- | libhb/enc_qsv.c | 386 |
1 files changed, 172 insertions, 214 deletions
diff --git a/libhb/enc_qsv.c b/libhb/enc_qsv.c index 7f85420f7..1afedffca 100644 --- a/libhb/enc_qsv.c +++ b/libhb/enc_qsv.c @@ -287,10 +287,13 @@ int qsv_enc_init(hb_work_private_t *pv) } for (i = 0; i < qsv_encode->surface_num; i++) { - qsv_encode->p_surfaces[i] = av_mallocz(sizeof(mfxFrameSurface1)); - AV_QSV_CHECK_POINTER(qsv_encode->p_surfaces[i], MFX_ERR_MEMORY_ALLOC); - memcpy(&(qsv_encode->p_surfaces[i]->Info), - &(qsv_encode->request[0].Info), sizeof(mfxFrameInfo)); + mfxFrameSurface1 *surface = av_mallocz(sizeof(mfxFrameSurface1)); + mfxFrameInfo info = pv->param.videoParam->mfx.FrameInfo; + surface->Info = info; + surface->Data.Pitch = info.Width; + surface->Data.Y = av_mallocz(info.Width * info.Height); + surface->Data.VU = av_mallocz(info.Width * info.Height / 2); + qsv_encode->p_surfaces[i] = surface; } } else @@ -1212,10 +1215,8 @@ void encqsvClose(hb_work_object_t *w) { if (pv->is_sys_mem) { - free(qsv_enc_space->p_surfaces[i]->Data.Y); - qsv_enc_space->p_surfaces[i]->Data.Y = NULL; - free(qsv_enc_space->p_surfaces[i]->Data.VU); - qsv_enc_space->p_surfaces[i]->Data.VU = NULL; + av_freep(&qsv_enc_space->p_surfaces[i]->Data.VU); + av_freep(&qsv_enc_space->p_surfaces[i]->Data.Y); } av_freep(&qsv_enc_space->p_surfaces[i]); } @@ -1284,160 +1285,144 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) { hb_work_private_t *pv = w->private_data; hb_job_t *job = pv->job; - hb_buffer_t *in = *buf_in, *buf; - hb_buffer_t *last_buf = NULL; + hb_buffer_t *in = *buf_in; + hb_buffer_t *buf, *last_buf = NULL; mfxStatus sts = MFX_ERR_NONE; - int is_end = 0; - av_qsv_list *received_item = NULL; - av_qsv_stage *stage = NULL; while (qsv_enc_init(pv) >= 2) { av_qsv_sleep(1); // encoding not initialized, wait and repeat the call } - *buf_out = NULL; if (*job->die) { - return HB_WORK_DONE; // unrecoverable error, don't attempt to encode + goto fail; // unrecoverable error, don't attempt to encode } - av_qsv_context *qsv = job->qsv.ctx; - av_qsv_space *qsv_encode = job->qsv.ctx->enc_space; + av_qsv_list *received_item = NULL; + mfxEncodeCtrl *work_control = NULL; + mfxFrameSurface1 *work_surface = NULL; + av_qsv_context *qsv = job->qsv.ctx; + av_qsv_space *qsv_encode = job->qsv.ctx->enc_space; + /* + * EOF on input. Flush the decoder, then send the + * EOF downstream to let the muxer know we're done. + */ if (in->size <= 0) { - // do delayed frames yet - *buf_in = NULL; - is_end = 1; + *buf_in = NULL; // don't let 'work_loop' close this buffer } - - // input from decode, as called - we always have some to proceed with - while (1) + else { - mfxEncodeCtrl *work_control = NULL; - mfxFrameSurface1 *work_surface = NULL; - - if (!is_end) + if (pv->is_sys_mem) { - if (pv->is_sys_mem) + mfxFrameInfo *info = &pv->param.videoParam->mfx.FrameInfo; + int surface_index = av_qsv_get_free_surface(qsv_encode, qsv, info, + QSV_PART_ANY); + if (surface_index == -1) { - int surface_idx = av_qsv_get_free_surface(qsv_encode, qsv, - &qsv_encode->request[0].Info, QSV_PART_ANY); - work_surface = qsv_encode->p_surfaces[surface_idx]; - - if (work_surface->Data.Y == NULL) - { - // if nv12 and 422 12bits per pixel - work_surface->Data.Pitch = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width; - work_surface->Data.Y = calloc(1, - pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width * - pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Height); - work_surface->Data.VU = calloc(1, - pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width * - pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Height / 2); - } - qsv_yuv420_to_nv12(pv->sws_context_to_nv12, work_surface, in); - } - else - { - received_item = in->qsv_details.qsv_atom; - stage = av_qsv_get_last_stage(received_item); - work_surface = stage->out.p_surface; - - // don't let qsv->dts_seq grow needlessly - av_qsv_dts_pop(qsv); + hb_error("encqsv: av_qsv_get_free_surface failed"); + goto fail; } - work_surface->Data.TimeStamp = in->s.start; + work_surface = qsv_encode->p_surfaces[surface_index]; + qsv_yuv420_to_nv12(pv->sws_context_to_nv12, work_surface, in); + } + else + { + received_item = in->qsv_details.qsv_atom; + work_surface = av_qsv_get_last_stage(received_item)->out.p_surface; /* - * Debugging code to check that the upstream modules have generated - * a continuous, self-consistent frame stream. + * QSV decoding fills the QSV context's dts_seq list, we need to + * pop this surface's DTS so dts_seq doesn't grow unnecessarily. */ - int64_t start = work_surface->Data.TimeStamp; - if (pv->last_start > start) + av_qsv_dts_pop(qsv); + } + + work_surface->Data.TimeStamp = in->s.start; + + /* + * Debugging code to check that the upstream modules have generated + * a continuous, self-consistent frame stream. + */ + int64_t start = work_surface->Data.TimeStamp; + if (pv->last_start > start) + { + hb_log("encqsv: input continuity error, last start %"PRId64" start %"PRId64"", + pv->last_start, start); + } + pv->last_start = start; + + /* for DTS generation (when MSDK API < 1.6 or VFR) */ + if (pv->bfrm_delay && pv->bfrm_workaround) + { + if (pv->frames_in <= BFRM_DELAY_MAX) { - hb_log("encqsvWork: input continuity error, last start %"PRId64" start %"PRId64"", - pv->last_start, start); + pv->init_pts[pv->frames_in] = work_surface->Data.TimeStamp; } - pv->last_start = start; - - // for DTS generation (when MSDK API < 1.6 or VFR) - if (pv->bfrm_delay && pv->bfrm_workaround) + if (pv->frames_in) { - if (pv->frames_in <= BFRM_DELAY_MAX) - { - pv->init_pts[pv->frames_in] = work_surface->Data.TimeStamp; - } - if (pv->frames_in) - { - hb_qsv_add_new_dts(pv->list_dts, - work_surface->Data.TimeStamp); - } + hb_qsv_add_new_dts(pv->list_dts, + work_surface->Data.TimeStamp); } + } + pv->frames_in++; - /* - * Chapters have to start with a keyframe so request that this - * frame be coded as IDR. Since there may be several frames - * buffered in the encoder, remember the timestamp so when this - * frame finally pops out of the encoder we'll mark its buffer - * as the start of a chapter. - */ - if (in->s.new_chap > 0 && job->chapter_markers) + if (in->s.new_chap > 0 && job->chapter_markers) + { + if (pv->next_chapter_pts == AV_NOPTS_VALUE) { - if (pv->next_chapter_pts == AV_NOPTS_VALUE) - { - pv->next_chapter_pts = work_surface->Data.TimeStamp; - } - /* insert an IDR */ - work_control = &pv->force_keyframe; - /* - * Chapter markers are sometimes so close we can get a new - * one before the previous goes through the encoding queue. - * - * Dropping markers can cause weird side-effects downstream, - * including but not limited to missing chapters in the - * output, so we need to save it somehow. - */ - struct chapter_s *item = malloc(sizeof(struct chapter_s)); - if (item != NULL) - { - item->index = in->s.new_chap; - item->start = work_surface->Data.TimeStamp; - hb_list_add(pv->delayed_chapters, item); - } - /* don't let 'work_loop' put a chapter mark on the wrong buffer */ - in->s.new_chap = 0; + pv->next_chapter_pts = work_surface->Data.TimeStamp; } + /* Chapters have to start with a keyframe, so request an IDR */ + work_control = &pv->force_keyframe; + /* - * If interlaced encoding is requested during encoder initialization, - * but the input mfxFrameSurface1 is flagged as progressive here, - * the output bitstream will be progressive (according to MediaInfo). + * Chapter markers are sometimes so close we can get a new + * one before the previous goes through the encoding queue. * - * Assume the user knows what he's doing (say he is e.g. encoding a - * progressive-flagged source using interlaced compression - he may - * well have a good reason to do so; mis-flagged sources do exist). + * Dropping markers can cause weird side-effects downstream, + * including but not limited to missing chapters in the + * output, so we need to save it somehow. */ - work_surface->Info.PicStruct = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.PicStruct; - } - else - { - work_surface = NULL; - received_item = NULL; + struct chapter_s *item = malloc(sizeof(struct chapter_s)); + if (item != NULL) + { + item->index = in->s.new_chap; + item->start = work_surface->Data.TimeStamp; + hb_list_add(pv->delayed_chapters, item); + } + /* don't let 'work_loop' put a chapter mark on the wrong buffer */ + in->s.new_chap = 0; } + + /* + * If interlaced encoding is requested during encoder initialization, + * but the input mfxFrameSurface1 is flagged as progressive here, + * the output bitstream will be progressive (according to MediaInfo). + * + * Assume the user knows what he's doing (say he is e.g. encoding a + * progressive-flagged source using interlaced compression - he may + * well have a good reason to do so; mis-flagged sources do exist). + */ + work_surface->Info.PicStruct = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.PicStruct; + } + + do + { int sync_idx = av_qsv_get_free_sync(qsv_encode, qsv); if (sync_idx == -1) { - hb_error("qsv: Not enough resources allocated for QSV encode"); - return 0; + hb_error("encqsv: av_qsv_get_free_sync failed"); + goto fail; } av_qsv_task *task = av_qsv_list_item(qsv_encode->tasks, pv->async_depth); - for (;;) + do { - // Encode a frame asychronously (returns immediately) sts = MFXVideoENCODE_EncodeFrameAsync(qsv->mfx_session, work_control, work_surface, task->bs, qsv_encode->p_syncp[sync_idx]->p_sync); @@ -1453,32 +1438,31 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) if (sts == MFX_ERR_MORE_DATA) { - ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use); if (work_surface != NULL && received_item != NULL) { hb_list_add(pv->delayed_processing, received_item); } + ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use); break; } - - AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts); - - if (sts >= MFX_ERR_NONE /*&& !syncpE*/) // repeat the call if warning and no output + else if (sts < MFX_ERR_NONE) + { + hb_error("encqsv: MFXVideoENCODE_EncodeFrameAsync failed (%d)", sts); + goto fail; + } + else if (sts == MFX_WRN_DEVICE_BUSY) + { + av_qsv_sleep(10); // device is busy, wait then repeat the call + continue; + } + else { - if (sts == MFX_WRN_DEVICE_BUSY) - { - av_qsv_sleep(10); // wait if device is busy - continue; - } - av_qsv_stage *new_stage = av_qsv_stage_init(); - new_stage->type = AV_QSV_ENCODE; + new_stage->type = AV_QSV_ENCODE; new_stage->in.p_surface = work_surface; - new_stage->out.sync = qsv_encode->p_syncp[sync_idx]; - - new_stage->out.p_bs = task->bs;//qsv_encode->bs; - task->stage = new_stage; - + new_stage->out.sync = qsv_encode->p_syncp[sync_idx]; + new_stage->out.p_bs = task->bs; + task->stage = new_stage; pv->async_depth++; if (received_item != NULL) @@ -1487,17 +1471,16 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) } else { - // flushing the end - int pipe_idx = av_qsv_list_add(qsv->pipes, av_qsv_list_init(HAVE_THREADS)); - av_qsv_list *list_item = av_qsv_list_item(qsv->pipes, pipe_idx); - av_qsv_add_stagee(&list_item, new_stage, HAVE_THREADS); + /* encode-only or flushing */ + av_qsv_list *new_atom = av_qsv_list_init(HAVE_THREADS); + av_qsv_add_stagee(&new_atom, new_stage, HAVE_THREADS); + av_qsv_list_add (qsv->pipes, new_atom); } - int i = 0; - for (i = hb_list_count(pv->delayed_processing); i > 0; i--) + int i = hb_list_count(pv->delayed_processing); + while (--i >= 0) { - av_qsv_list *item = hb_list_item(pv->delayed_processing, - i - 1); + av_qsv_list *item = hb_list_item(pv->delayed_processing, i); if (item != NULL) { @@ -1505,19 +1488,13 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) av_qsv_flush_stages(qsv->pipes, &item); } } - break; } ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use); - - if (sts == MFX_ERR_NOT_ENOUGH_BUFFER) - { - HB_DEBUG_ASSERT(1, "The bitstream buffer size is insufficient."); - } - break; } + while (sts >= MFX_ERR_NONE); buf = NULL; @@ -1525,58 +1502,42 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) { if (pv->async_depth == 0) break; - // working properly with sync depth approach of MediaSDK OR flushing, if at the end - if (pv->async_depth >= pv->max_async_depth || is_end) + /* we've done enough asynchronous operations or we're flushing */ + if (pv->async_depth >= pv->max_async_depth || work_surface == NULL) { - pv->async_depth--; - av_qsv_task *task = av_qsv_list_item(qsv_encode->tasks, 0); - av_qsv_stage *stage = task->stage; - av_qsv_list *this_pipe = av_qsv_pipe_by_stage(qsv->pipes, stage); - sts = MFX_ERR_NONE; + pv->async_depth--; - // only here we need to wait on operation been completed, therefore SyncOperation is used, - // after this step - we continue to work with bitstream, muxing ... - av_qsv_wait_on_sync(qsv, stage); + /* perform a sync operation to get the output bitstream */ + av_qsv_wait_on_sync(qsv, task->stage); if (task->bs->DataLength > 0) { - av_qsv_flush_stages(qsv->pipes, &this_pipe); + av_qsv_list *pipe = av_qsv_pipe_by_stage(qsv->pipes, task->stage); + av_qsv_flush_stages(qsv->pipes, &pipe); - // see nal_encode - buf = hb_video_buffer_init(job->width, job->height); + /* allocate additional data for parse_nalus */ + buf = hb_buffer_init(task->bs->DataLength * 2); buf->size = 0; - // map Media SDK's FrameType to our internal representation - buf->s.frametype = hb_qsv_frametype_xlat(task->bs->FrameType, - &buf->s.flags); - /* * we need to convert the encoder's Annex B output * to an MP4-compatible format (ISO/IEC 14496-15). */ parse_nalus(task->bs->Data + task->bs->DataOffset, task->bs->DataLength, buf); + task->bs->DataLength = task->bs->DataOffset = 0; + task->bs->MaxLength = qsv_encode->p_buf_max_size; - if (last_buf == NULL) - { - *buf_out = buf; - } - else - { - last_buf->next = buf; - } - last_buf = buf; + /* frame duration (based on average frame rate) */ + int64_t duration = ((double)job->vrate_base / + (double)job->vrate * 90000.); - // simple for now but check on TimeStampCalc from MSDK - int64_t duration = ((double)pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.FrameRateExtD / - (double)pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.FrameRateExtN) * 90000.; + buf->s.frametype = hb_qsv_frametype_xlat(task->bs->FrameType, &buf->s.flags); + buf->s.start = buf->s.renderOffset = task->bs->TimeStamp; + buf->s.stop = buf->s.start + duration; + buf->s.duration = duration; - // start -> PTS - // renderOffset -> DTS - buf->s.start = buf->s.renderOffset = task->bs->TimeStamp; - buf->s.stop = buf->s.start + duration; - buf->s.duration = duration; if (pv->bfrm_delay) { if ((pv->frames_out == 0) && @@ -1627,14 +1588,14 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) (task->bs->FrameType & MFX_FRAMETYPE_B) && (task->bs->FrameType & MFX_FRAMETYPE_REF)) { - hb_log("encqsvWork: BPyramid off not respected (delay: %d)", + hb_log("encqsv: BPyramid off not respected (delay: %d)", pv->bfrm_delay); } // check for PTS < DTS if (buf->s.start < buf->s.renderOffset) { - hb_log("encqsvWork: PTS %"PRId64" < DTS %"PRId64" for frame %d with type '%s' (bfrm_workaround: %d)", + hb_log("encqsv: PTS %"PRId64" < DTS %"PRId64" for frame %d with type '%s' (bfrm_workaround: %d)", buf->s.start, buf->s.renderOffset, pv->frames_out + 1, hb_qsv_frametype_name(task->bs->FrameType), pv->bfrm_workaround); @@ -1693,44 +1654,32 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) } } - // shift for fifo + /* shift for fifo */ if (pv->async_depth) { av_qsv_list_rem(qsv_encode->tasks, task); av_qsv_list_add(qsv_encode->tasks, task); } + task->stage = NULL; - task->bs->DataLength = 0; - task->bs->DataOffset = 0; - task->bs->MaxLength = qsv_encode->p_buf_max_size; - task->stage = NULL; + if (last_buf == NULL) + { + *buf_out = last_buf = buf; + } + else + { + last_buf->next = buf; + last_buf = buf; + } pv->frames_out++; } } } - while (is_end); - - - if (is_end) - { - if (buf == NULL && sts == MFX_ERR_MORE_DATA) - { - break; - } - } - else - { - break; - } - - } - - if (!is_end) - { - ++pv->frames_in; + while (work_surface == NULL); } + while (work_surface == NULL && sts != MFX_ERR_MORE_DATA); - if (is_end) + if (work_surface == NULL) { if (last_buf != NULL) { @@ -1746,6 +1695,15 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) { return HB_WORK_OK; } + +fail: + if (*job->done_error == HB_ERROR_NONE) + { + *job->done_error = HB_ERROR_UNKNOWN; + } + *job->die = 1; + *buf_out = NULL; + return HB_WORK_ERROR; } int nal_find_start_code(uint8_t **pb, size_t *size) |