diff options
author | Artem Galin <[email protected]> | 2020-06-02 16:06:39 +0100 |
---|---|---|
committer | Scott <[email protected]> | 2020-06-13 12:27:01 +0100 |
commit | 04c3ee1679f29dbfae9a64a6f66637ea0cf9d6b7 (patch) | |
tree | 65043285a51ed4b361fa8a6a192826dcedfb2845 /libhb | |
parent | 9d0d52685fa68f5186234b55d09ec761127a9909 (diff) |
qsv: performance optimization of dx11 texture copying
Created dedicated pool of single textures instead of using DirectX pool
textures with slices from FFMPEG.
Explicitly call ID3D11DeviceContext_Flush function
Improved overall 4K AVC -> 4k HEVC transcoding performance from 58 fps to 63 fps on IceLake when using zero copy
Diffstat (limited to 'libhb')
-rw-r--r-- | libhb/enc_qsv.c | 5 | ||||
-rw-r--r-- | libhb/qsv_common.c | 173 |
2 files changed, 122 insertions, 56 deletions
diff --git a/libhb/enc_qsv.c b/libhb/enc_qsv.c index 3c5ece4d7..d55882cc0 100644 --- a/libhb/enc_qsv.c +++ b/libhb/enc_qsv.c @@ -2081,11 +2081,6 @@ static int qsv_enc_work(hb_work_private_t *pv, if (sts == MFX_ERR_MORE_DATA) { - if(!pv->is_sys_mem && surface) - { - hb_qsv_release_surface_from_pool(surface->Data.MemId); - } - if (qsv_atom != NULL) { hb_list_add(pv->delayed_processing, qsv_atom); diff --git a/libhb/qsv_common.c b/libhb/qsv_common.c index 6226bbf83..ef16b5593 100644 --- a/libhb/qsv_common.c +++ b/libhb/qsv_common.c @@ -2305,8 +2305,8 @@ AVBufferRef *enc_hw_frames_ctx = NULL; extern EncQSVFramesContext hb_enc_qsv_frames_ctx; AVBufferRef *hb_hw_device_ctx = NULL; char *qsv_device = NULL; -mfxHDL device_manager_handle = NULL; -mfxIMPL device_impl; +static mfxHDL device_manager_handle = NULL; +static mfxHandleType device_manager_handle_type; #if defined(_WIN32) || defined(__MINGW32__) // Direct X @@ -2320,6 +2320,8 @@ mfxIMPL device_impl; #include <dxgidebug.h> #endif +static ID3D11DeviceContext *device_context = NULL; + typedef IDirect3D9* WINAPI pDirect3DCreate9(UINT); typedef HRESULT WINAPI pDirect3DCreate9Ex(UINT, IDirect3D9Ex **); typedef HRESULT(WINAPI *HB_PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory); @@ -2537,12 +2539,10 @@ static int hb_qsv_find_surface_idx(const QSVMid *mids, const int nb_mids, const if(mids) { const QSVMid *m = &mids[0]; - if (m->texture != mid->texture) - return -1; int i; for (i = 0; i < nb_mids; i++) { m = &mids[i]; - if ( m->handle == mid->handle ) + if ( (m->texture == mid->texture) && (m->handle == mid->handle) ) return i; } } @@ -2617,67 +2617,141 @@ void hb_qsv_get_free_surface_from_pool(const int start_index, const int end_inde } } -hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) +static int hb_qsv_allocate_dx11_encoder_pool(ID3D11Device *device, ID3D11Texture2D* input_texture) { - hb_buffer_t *out; - out = hb_frame_buffer_init(frame->format, frame->width, frame->height); - hb_avframe_set_video_buffer_flags(out, frame, (AVRational){1,1}); + D3D11_TEXTURE2D_DESC desc = { 0 }; + ID3D11Texture2D_GetDesc(input_texture, &desc); + desc.ArraySize = 1; + desc.BindFlags = D3D10_BIND_RENDER_TARGET; - // alloc new frame - out->qsv_details.frame = av_frame_alloc(); - if (!out->qsv_details.frame) { - return out; - } - - // copy content of input frame - av_frame_copy(out->qsv_details.frame, frame); - // but no copy the sufrace pointer, it will be added later from the pool - out->qsv_details.frame->data[3] = 0; + for (size_t i = 0; i < hb_enc_qsv_frames_ctx.nb_mids; i++) + { + ID3D11Texture2D* texture; + HRESULT hr = ID3D11Device_CreateTexture2D(device, &desc, NULL, &texture); + if (hr != S_OK) + { + hb_error("hb_qsv_allocate_dx11_encoder_pool: ID3D11Device_CreateTexture2D error"); + return -1; + } - QSVMid *mid = NULL; - mfxFrameSurface1* output_surface = NULL; + QSVMid *mid = &hb_enc_qsv_frames_ctx.mids[i]; + mid->handle = 0; + mid->texture = texture; + } + return 0; +} - hb_qsv_get_free_surface_from_pool(0, HB_POOL_SURFACE_SIZE - HB_POOL_ENCODER_SIZE, &mid, &output_surface); +static int hb_qsv_deallocate_dx11_encoder_pool() +{ + if (device_manager_handle_type == MFX_HANDLE_D3D11_DEVICE) + { + for (size_t i = 0; i < hb_enc_qsv_frames_ctx.nb_mids; i++) + { + QSVMid *mid = &hb_enc_qsv_frames_ctx.mids[i]; + ID3D11Texture2D* texture = mid->texture; + if (texture) + { + HRESULT hr = ID3D11Texture2D_Release(texture); + mid->texture = NULL; + if (hr != S_OK) + { + hb_error("hb_qsv_deallocate_dx11_encoder_pool: ID3D11Device_ReleaseTexture2D error"); + return -1; + } + } + } + } + return 0; +} +static int hb_qsv_get_dx_device() +{ AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)hb_hw_device_ctx->data; AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; mfxSession parent_session = device_hwctx->session; if (device_manager_handle == NULL) { - mfxHandleType handle_type; + mfxIMPL device_impl; int err = MFXQueryIMPL(parent_session, &device_impl); if (err != MFX_ERR_NONE) { - hb_error("hb_qsv_copy_frame: no impl could be retrieved"); - return out; + hb_error("hb_qsv_get_dx_device: no impl could be retrieved"); + return -1; } if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(device_impl)) { - handle_type = MFX_HANDLE_D3D11_DEVICE; + device_manager_handle_type = MFX_HANDLE_D3D11_DEVICE; } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(device_impl)) { - handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; + device_manager_handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; } else { - hb_error("hb_qsv_copy_frame: unsupported impl"); - return out; + hb_error("hb_qsv_get_dx_device: unsupported impl"); + return -1; } - err = MFXVideoCORE_GetHandle(parent_session, handle_type, &device_manager_handle); + err = MFXVideoCORE_GetHandle(parent_session, device_manager_handle_type, &device_manager_handle); if (err != MFX_ERR_NONE) { - hb_error("hb_qsv_copy_frame: no supported hw handle could be retrieved " + hb_error("hb_qsv_get_dx_device: no supported hw handle could be retrieved " "from the session\n"); - return out; + return -1; + } + if (device_manager_handle_type == MFX_HANDLE_D3D11_DEVICE) + { + ID3D11Device *device = (ID3D11Device *)device_manager_handle; + ID3D11Texture2D* input_texture = hb_enc_qsv_frames_ctx.input_texture; + err = hb_qsv_allocate_dx11_encoder_pool(device, input_texture); + if (err < 0) + { + hb_error("hb_qsv_get_dx_device: hb_qsv_allocate_dx11_encoder_pool failed"); + return -1; + } + if (device_context == NULL) + { + ID3D11Device_GetImmediateContext(device, &device_context); + if (!device_context) + return -1; + } } } + return 0; +} - if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(device_impl)) +hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) +{ + hb_buffer_t *out; + out = hb_frame_buffer_init(frame->format, frame->width, frame->height); + hb_avframe_set_video_buffer_flags(out, frame, (AVRational){1,1}); + + // alloc new frame + out->qsv_details.frame = av_frame_alloc(); + if (!out->qsv_details.frame) { + return out; + } + + // copy content of input frame + av_frame_copy(out->qsv_details.frame, frame); + // but no copy the sufrace pointer, it will be added later from the pool + out->qsv_details.frame->data[3] = 0; + + QSVMid *mid = NULL; + mfxFrameSurface1* output_surface = NULL; + hb_qsv_get_free_surface_from_pool(0, HB_POOL_SURFACE_SIZE - HB_POOL_ENCODER_SIZE, &mid, &output_surface); + + if (device_manager_handle_type == MFX_HANDLE_D3D9_DEVICE_MANAGER) { + mfxFrameSurface1* input_surface = (mfxFrameSurface1*)frame->data[3]; + + // copy all surface fields + *output_surface = *input_surface; + // replace the mem id to mem id from the pool + output_surface->Data.MemId = mid; + // copy input sufrace to sufrace from the pool IDirect3DDevice9 *pDevice = NULL; HANDLE handle; @@ -2687,14 +2761,6 @@ hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) hb_error("hb_qsv_copy_frame: lock_device failded %d", result); return out; } - - mfxFrameSurface1* input_surface = (mfxFrameSurface1*)frame->data[3]; - - // copy all surface fields - *output_surface = *input_surface; - // replace the mem id to mem id from the pool - output_surface->Data.MemId = mid; - // copy input sufrace to sufrace from the pool result = IDirect3DDevice9_StretchRect(pDevice, input_surface->Data.MemId, 0, mid->handle, 0, D3DTEXF_LINEAR); if (FAILED(result)) { @@ -2707,15 +2773,9 @@ hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) hb_error("hb_qsv_copy_frame: unlock_device failded %d", result); return out; } - } - else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(device_impl)) + } + else if (device_manager_handle_type == MFX_HANDLE_D3D11_DEVICE) { - ID3D11DeviceContext *device_context = NULL; - ID3D11Device *device = (ID3D11Device *)device_manager_handle; - ID3D11Device_GetImmediateContext(device, &device_context); - if (!device_context) - return out; - mfxFrameSurface1* input_surface = (mfxFrameSurface1*)frame->data[3]; // copy all surface fields @@ -2724,13 +2784,13 @@ hb_buffer_t* hb_qsv_copy_frame(AVFrame *frame, hb_qsv_context *qsv_ctx) output_surface->Data.MemId = mid; // copy input sufrace to sufrace from the pool ID3D11DeviceContext_CopySubresourceRegion(device_context, mid->texture, (uint64_t)mid->handle, 0, 0, 0, hb_enc_qsv_frames_ctx.input_texture, (uint64_t)input_surface->Data.MemId, NULL); + ID3D11DeviceContext_Flush(device_context); } - else + else { hb_error("hb_qsv_copy_frame: incorrect mfx impl"); return out; } - out->qsv_details.frame->data[3] = (uint8_t*)output_surface; out->qsv_details.qsv_atom = 0; out->qsv_details.ctx = qsv_ctx; @@ -2762,6 +2822,12 @@ void hb_qsv_uninit_enc() hb_hw_device_ctx = NULL; qsv_device = NULL; device_manager_handle = NULL; + hb_qsv_deallocate_dx11_encoder_pool(); + if (device_context) + { + ID3D11DeviceContext_Release(device_context); + device_context = NULL; + } } static int qsv_device_init(AVCodecContext *s) @@ -2857,6 +2923,11 @@ static int qsv_init(AVCodecContext *s) hb_enc_qsv_frames_ctx.mids = (QSVMid*)hb_enc_qsv_frames_ctx.mids_buf->data; hb_enc_qsv_frames_ctx.nb_mids = frames_hwctx->nb_surfaces; memset(hb_enc_qsv_frames_ctx.pool, 0, hb_enc_qsv_frames_ctx.nb_mids * sizeof(hb_enc_qsv_frames_ctx.pool[0])); + ret = hb_qsv_get_dx_device(); + if (ret < 0) { + hb_error("qsv_init: hb_qsv_get_dx_device failed %d", ret); + return ret; + } return 0; } |