/* Copyright (C) 2006 Michael Niedermayer This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "hb.h" #include "hbffmpeg.h" #include "mpeg2dec/mpeg2.h" #define SUPPRESS_AV_LOG #define YADIF_MODE_DEFAULT -1 #define YADIF_PARITY_DEFAULT -1 #define MCDEINT_MODE_DEFAULT -1 #define MCDEINT_QP_DEFAULT 1 #define ABS(a) ((a) > 0 ? (a) : (-(a))) #define MIN3(a,b,c) MIN(MIN(a,b),c) #define MAX3(a,b,c) MAX(MAX(a,b),c) typedef struct yadif_arguments_s { uint8_t **dst; int parity; int tff; int stop; } yadif_arguments_t; struct hb_filter_private_s { int pix_fmt; int width[3]; int height[3]; int yadif_mode; int yadif_parity; int yadif_ready; uint8_t * yadif_ref[4][3]; int yadif_ref_stride[3]; int cpu_count; hb_thread_t ** yadif_threads; // Threads for Yadif - one per CPU hb_lock_t ** yadif_begin_lock; // Thread has work hb_lock_t ** yadif_complete_lock; // Thread has completed work yadif_arguments_t *yadif_arguments; // Arguments to thread for work int mcdeint_mode; int mcdeint_qp; int mcdeint_outbuf_size; uint8_t * mcdeint_outbuf; AVCodecContext * mcdeint_avctx_enc; AVFrame * mcdeint_frame; AVFrame * mcdeint_frame_dec; AVPicture pic_in; AVPicture pic_out; hb_buffer_t * buf_out[2]; hb_buffer_t * buf_settings; }; hb_filter_private_t * hb_deinterlace_init( int pix_fmt, int width, int height, char * settings ); int hb_deinterlace_work( hb_buffer_t * buf_in, hb_buffer_t ** buf_out, int pix_fmt, int width, int height, hb_filter_private_t * pv ); void hb_deinterlace_close( hb_filter_private_t * pv ); hb_filter_object_t hb_filter_deinterlace = { FILTER_DEINTERLACE, "Deinterlace (ffmpeg or yadif/mcdeint)", NULL, hb_deinterlace_init, hb_deinterlace_work, hb_deinterlace_close, }; static void yadif_store_ref( const uint8_t ** pic, hb_filter_private_t * pv ) { memcpy( pv->yadif_ref[3], pv->yadif_ref[0], sizeof(uint8_t *)*3 ); memmove( pv->yadif_ref[0], pv->yadif_ref[1], sizeof(uint8_t *)*3*3 ); int i; for( i = 0; i < 3; i++ ) { const uint8_t * src = pic[i]; uint8_t * ref = pv->yadif_ref[2][i]; int w = pv->width[i]; int ref_stride = pv->yadif_ref_stride[i]; int y; for( y = 0; y < pv->height[i]; y++ ) { memcpy(ref, src, w); src = (uint8_t*)src + w; ref = (uint8_t*)ref + ref_stride; } } } static void yadif_filter_line( uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int plane, int parity, hb_filter_private_t * pv ) { uint8_t *prev2 = parity ? prev : cur ; uint8_t *next2 = parity ? cur : next; int w = pv->width[plane]; int refs = pv->yadif_ref_stride[plane]; int x; for( x = 0; x < w; x++) { int c = cur[-refs]; int d = (prev2[0] + next2[0])>>1; int e = cur[+refs]; int temporal_diff0 = ABS(prev2[0] - next2[0]); int temporal_diff1 = ( ABS(prev[-refs] - c) + ABS(prev[+refs] - e) ) >> 1; int temporal_diff2 = ( ABS(next[-refs] - c) + ABS(next[+refs] - e) ) >> 1; int diff = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2); int spatial_pred = (c+e)>>1; int spatial_score = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(c-e) + ABS(cur[-refs+1] - cur[+refs+1]) - 1; #define YADIF_CHECK(j)\ { int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\ + ABS(cur[-refs +j] - cur[+refs -j])\ + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\ if( score < spatial_score ){\ spatial_score = score;\ spatial_pred = (cur[-refs +j] + cur[+refs -j])>>1;\ YADIF_CHECK(-1) YADIF_CHECK(-2) }} }} YADIF_CHECK( 1) YADIF_CHECK( 2) }} }} if( pv->yadif_mode < 2 ) { int b = (prev2[-2*refs] + next2[-2*refs])>>1; int f = (prev2[+2*refs] + next2[+2*refs])>>1; int max = MAX3(d-e, d-c, MIN(b-c, f-e)); int min = MIN3(d-e, d-c, MAX(b-c, f-e)); diff = MAX3( diff, min, -max ); } if( spatial_pred > d + diff ) { spatial_pred = d + diff; } else if( spatial_pred < d - diff ) { spatial_pred = d - diff; } dst[0] = spatial_pred; dst++; cur++; prev++; next++; prev2++; next2++; } } typedef struct yadif_thread_arg_s { hb_filter_private_t *pv; int segment; } yadif_thread_arg_t; /* * deinterlace this segment of all three planes in a single thread. */ void yadif_filter_thread( void *thread_args_v ) { yadif_arguments_t *yadif_work = NULL; hb_filter_private_t * pv; int run = 1; int plane; int segment, segment_start, segment_stop; yadif_thread_arg_t *thread_args = thread_args_v; uint8_t **dst; int parity, tff, y, w, h, ref_stride, penultimate, ultimate; pv = thread_args->pv; segment = thread_args->segment; hb_log("Yadif Deinterlace thread started for segment %d", segment); while( run ) { /* * Wait here until there is work to do. hb_lock() blocks until * render releases it to say that there is more work to do. */ hb_lock( pv->yadif_begin_lock[segment] ); yadif_work = &pv->yadif_arguments[segment]; if( yadif_work->stop ) { /* * No more work to do, exit this thread. */ run = 0; continue; } if( yadif_work->dst == NULL ) { hb_error( "Thread started when no work available" ); hb_snooze(500); continue; } /* * Process all three planes, but only this segment of it. */ for( plane = 0; plane < 3; plane++) { dst = yadif_work->dst; parity = yadif_work->parity; tff = yadif_work->tff; w = pv->width[plane]; h = pv->height[plane]; penultimate = h -2; ultimate = h - 1; ref_stride = pv->yadif_ref_stride[plane]; segment_start = ( h / pv->cpu_count ) * segment; if( segment == pv->cpu_count - 1 ) { /* * Final segment */ segment_stop = h; } else { segment_stop = ( h / pv->cpu_count ) * ( segment + 1 ); } for( y = segment_start; y < segment_stop; y++ ) { if( ( ( y ^ parity ) & 1 ) ) { /* This is the bottom field when TFF and vice-versa. It's the field that gets filtered. Because yadif needs 2 lines above and below the one being filtered, we need to mirror the edges. When TFF, this means replacing the 2nd line with a copy of the 1st, and the last with the second-to-last. */ if( y > 1 && y < ( h -2 ) ) { /* This isn't the top or bottom, proceed as normal to yadif. */ uint8_t *prev = &pv->yadif_ref[0][plane][y*ref_stride]; uint8_t *cur = &pv->yadif_ref[1][plane][y*ref_stride]; uint8_t *next = &pv->yadif_ref[2][plane][y*ref_stride]; uint8_t *dst2 = &dst[plane][y*w]; yadif_filter_line( dst2, prev, cur, next, plane, parity ^ tff, pv ); } else if( y == 0 ) { /* BFF, so y0 = y1 */ memcpy( &dst[plane][y*w], &pv->yadif_ref[1][plane][1*ref_stride], w * sizeof(uint8_t) ); } else if( y == 1 ) { /* TFF, so y1 = y0 */ memcpy( &dst[plane][y*w], &pv->yadif_ref[1][plane][0], w * sizeof(uint8_t) ); } else if( y == penultimate ) { /* BFF, so penultimate y = ultimate y */ memcpy( &dst[plane][y*w], &pv->yadif_ref[1][plane][ultimate*ref_stride], w * sizeof(uint8_t) ); } else if( y == ultimate ) { /* TFF, so ultimate y = penultimate y */ memcpy( &dst[plane][y*w], &pv->yadif_ref[1][plane][penultimate*ref_stride], w * sizeof(uint8_t) ); } } else { /* Preserve this field unfiltered */ memcpy( &dst[plane][y*w], &pv->yadif_ref[1][plane][y*ref_stride], w * sizeof(uint8_t) ); } } } /* * Finished this segment, let everyone know. */ hb_unlock( pv->yadif_complete_lock[segment] ); } free( thread_args_v ); } /* * threaded yadif - each thread deinterlaces a single segment of all * three planes. Where a segment is defined as the frame divided by * the number of CPUs. * * This function blocks until the frame is deinterlaced. */ static void yadif_filter( uint8_t ** dst, int parity, int tff, hb_filter_private_t * pv ) { int segment; for( segment = 0; segment < pv->cpu_count; segment++ ) { /* * Setup the work for this plane. */ pv->yadif_arguments[segment].parity = parity; pv->yadif_arguments[segment].tff = tff; pv->yadif_arguments[segment].dst = dst; /* * Let the thread for this plane know that we've setup work * for it by releasing the begin lock (ensuring that the * complete lock is already locked so that we block when * we try to lock it again below). */ hb_lock( pv->yadif_complete_lock[segment] ); hb_unlock( pv->yadif_begin_lock[segment] ); } /* * Wait until all three threads have completed by trying to get * the complete lock that we locked earlier for each thread, which * will block until that thread has completed the work on that * plane. */ for( segment = 0; segment < pv->cpu_count; segment++ ) { hb_lock( pv->yadif_complete_lock[segment] ); hb_unlock( pv->yadif_complete_lock[segment] ); } /* * Entire frame is now deinterlaced. */ } static void mcdeint_filter( uint8_t ** dst, uint8_t ** src, int parity, hb_filter_private_t * pv ) { int x, y, i; int out_size; #ifdef SUPPRESS_AV_LOG /* TODO: temporarily change log level to suppress obnoxious debug output */ int loglevel = av_log_get_level(); av_log_set_level( AV_LOG_QUIET ); #endif for( i=0; i<3; i++ ) { pv->mcdeint_frame->data[i] = src[i]; pv->mcdeint_frame->linesize[i] = pv->width[i]; } pv->mcdeint_avctx_enc->me_cmp = FF_CMP_SAD; pv->mcdeint_avctx_enc->me_sub_cmp = FF_CMP_SAD; pv->mcdeint_frame->quality = pv->mcdeint_qp * FF_QP2LAMBDA; out_size = avcodec_encode_video( pv->mcdeint_avctx_enc, pv->mcdeint_outbuf, pv->mcdeint_outbuf_size, pv->mcdeint_frame ); pv->mcdeint_frame_dec = pv->mcdeint_avctx_enc->coded_frame; for( i = 0; i < 3; i++ ) { int w = pv->width[i]; int h = pv->height[i]; int fils = pv->mcdeint_frame_dec->linesize[i]; int srcs = pv->width[i]; for( y = 0; y < h; y++ ) { if( (y ^ parity) & 1 ) { for( x = 0; x < w; x++ ) { if( (x-2)+(y-1)*w >= 0 && (x+2)+(y+1)*w < w*h ) { uint8_t * filp = &pv->mcdeint_frame_dec->data[i][x + y*fils]; uint8_t * srcp = &src[i][x + y*srcs]; int diff0 = filp[-fils] - srcp[-srcs]; int diff1 = filp[+fils] - srcp[+srcs]; int spatial_score = ABS(srcp[-srcs-1] - srcp[+srcs-1]) + ABS(srcp[-srcs ] - srcp[+srcs ]) + ABS(srcp[-srcs+1] - srcp[+srcs+1]) - 1; int temp = filp[0]; #define MCDEINT_CHECK(j)\ { int score = ABS(srcp[-srcs-1+j] - srcp[+srcs-1-j])\ + ABS(srcp[-srcs +j] - srcp[+srcs -j])\ + ABS(srcp[-srcs+1+j] - srcp[+srcs+1-j]);\ if( score < spatial_score ) {\ spatial_score = score;\ diff0 = filp[-fils+j] - srcp[-srcs+j];\ diff1 = filp[+fils-j] - srcp[+srcs-j]; MCDEINT_CHECK(-1) MCDEINT_CHECK(-2) }} }} MCDEINT_CHECK( 1) MCDEINT_CHECK( 2) }} }} if(diff0 + diff1 > 0) { temp -= (diff0 + diff1 - ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2; } else { temp -= (diff0 + diff1 + ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2; } filp[0] = dst[i][x + y*w] = temp > 255U ? ~(temp>>31) : temp; } else { dst[i][x + y*w] = pv->mcdeint_frame_dec->data[i][x + y*fils]; } } } } for( y = 0; y < h; y++ ) { if( !((y ^ parity) & 1) ) { for( x = 0; x < w; x++ ) { pv->mcdeint_frame_dec->data[i][x + y*fils] = dst[i][x + y*w]= src[i][x + y*srcs]; } } } } #ifdef SUPPRESS_AV_LOG /* TODO: restore previous log level */ av_log_set_level(loglevel); #endif } hb_filter_private_t * hb_deinterlace_init( int pix_fmt, int width, int height, char * settings ) { if( pix_fmt != PIX_FMT_YUV420P ) { return 0; } hb_filter_private_t * pv = calloc( 1, sizeof(struct hb_filter_private_s) ); pv->pix_fmt = pix_fmt; pv->width[0] = width; pv->height[0] = height; pv->width[1] = pv->width[2] = width >> 1; pv->height[1] = pv->height[2] = height >> 1; pv->buf_out[0] = hb_video_buffer_init( width, height ); pv->buf_out[1] = hb_video_buffer_init( width, height ); pv->buf_settings = hb_buffer_init( 0 ); pv->yadif_ready = 0; pv->yadif_mode = YADIF_MODE_DEFAULT; pv->yadif_parity = YADIF_PARITY_DEFAULT; pv->mcdeint_mode = MCDEINT_MODE_DEFAULT; pv->mcdeint_qp = MCDEINT_QP_DEFAULT; if( settings ) { sscanf( settings, "%d:%d:%d:%d", &pv->yadif_mode, &pv->yadif_parity, &pv->mcdeint_mode, &pv->mcdeint_qp ); } pv->cpu_count = hb_get_cpu_count(); /* Allocate yadif specific buffers */ if( pv->yadif_mode >= 0 ) { int i, j; for( i = 0; i < 3; i++ ) { int is_chroma = !!i; int w = ((width + 31) & (~31))>>is_chroma; int h = ((height+6+ 31) & (~31))>>is_chroma; pv->yadif_ref_stride[i] = w; for( j = 0; j < 3; j++ ) { pv->yadif_ref[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w; } } /* * Create yadif threads and locks. */ pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count ); pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count ); for( i = 0; i < pv->cpu_count; i++ ) { yadif_thread_arg_t *thread_args; thread_args = malloc( sizeof( yadif_thread_arg_t ) ); if( thread_args ) { thread_args->pv = pv; thread_args->segment = i; pv->yadif_begin_lock[i] = hb_lock_init(); pv->yadif_complete_lock[i] = hb_lock_init(); /* * Important to start off with the threads locked waiting * on input. */ hb_lock( pv->yadif_begin_lock[i] ); pv->yadif_arguments[i].stop = 0; pv->yadif_arguments[i].dst = NULL; pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment", yadif_filter_thread, thread_args, HB_NORMAL_PRIORITY ); } else { hb_error( "Yadif could not create threads" ); } } } /* Allocate mcdeint specific buffers */ if( pv->mcdeint_mode >= 0 ) { avcodec_init(); avcodec_register_all(); AVCodec * enc = avcodec_find_encoder( CODEC_ID_SNOW ); int i; for (i = 0; i < 3; i++ ) { AVCodecContext * avctx_enc; avctx_enc = pv->mcdeint_avctx_enc = avcodec_alloc_context(); avctx_enc->width = width; avctx_enc->height = height; avctx_enc->time_base = (AVRational){1,25}; // meaningless avctx_enc->gop_size = 300; avctx_enc->max_b_frames = 0; avctx_enc->pix_fmt = PIX_FMT_YUV420P; avctx_enc->flags = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY; avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; avctx_enc->global_quality = 1; avctx_enc->flags2 = CODEC_FLAG2_MEMC_ONLY; avctx_enc->me_cmp = FF_CMP_SAD; //SSE; avctx_enc->me_sub_cmp = FF_CMP_SAD; //SSE; avctx_enc->mb_cmp = FF_CMP_SSE; switch( pv->mcdeint_mode ) { case 3: avctx_enc->refs = 3; case 2: avctx_enc->me_method = ME_UMH; case 1: avctx_enc->flags |= CODEC_FLAG_4MV; avctx_enc->dia_size =2; case 0: avctx_enc->flags |= CODEC_FLAG_QPEL; } hb_avcodec_open(avctx_enc, enc); } pv->mcdeint_frame = avcodec_alloc_frame(); pv->mcdeint_outbuf_size = width * height * 10; pv->mcdeint_outbuf = malloc( pv->mcdeint_outbuf_size ); } return pv; } void hb_deinterlace_close( hb_filter_private_t * pv ) { if( !pv ) { return; } /* Cleanup frame buffers */ if( pv->buf_out[0] ) { hb_buffer_close( &pv->buf_out[0] ); } if( pv->buf_out[1] ) { hb_buffer_close( &pv->buf_out[1] ); } if (pv->buf_settings ) { hb_buffer_close( &pv->buf_settings ); } /* Cleanup yadif specific buffers */ if( pv->yadif_mode >= 0 ) { int i; for( i = 0; i<3*3; i++ ) { uint8_t **p = &pv->yadif_ref[i%3][i/3]; if (*p) { free( *p - 3*pv->yadif_ref_stride[i/3] ); *p = NULL; } } for( i = 0; i < pv->cpu_count; i++) { /* * Tell each yadif thread to stop, and then cleanup. */ pv->yadif_arguments[i].stop = 1; hb_unlock( pv->yadif_begin_lock[i] ); hb_thread_close( &pv->yadif_threads[i] ); hb_lock_close( &pv->yadif_begin_lock[i] ); hb_lock_close( &pv->yadif_complete_lock[i] ); } /* * free memory for yadif structs */ free( pv->yadif_threads ); free( pv->yadif_begin_lock ); free( pv->yadif_complete_lock ); free( pv->yadif_arguments ); } /* Cleanup mcdeint specific buffers */ if( pv->mcdeint_mode >= 0 ) { if( pv->mcdeint_avctx_enc ) { hb_avcodec_close( pv->mcdeint_avctx_enc ); av_freep( &pv->mcdeint_avctx_enc ); } if( pv->mcdeint_outbuf ) { free( pv->mcdeint_outbuf ); } } free( pv ); } int hb_deinterlace_work( hb_buffer_t * buf_in, hb_buffer_t ** buf_out, int pix_fmt, int width, int height, hb_filter_private_t * pv ) { if( !pv || pix_fmt != pv->pix_fmt || width != pv->width[0] || height != pv->height[0] ) { return FILTER_FAILED; } avpicture_fill( &pv->pic_in, buf_in->data, pix_fmt, width, height ); /* Use libavcodec deinterlace if yadif_mode < 0 */ if( pv->yadif_mode < 0 ) { avpicture_fill( &pv->pic_out, pv->buf_out[0]->data, pix_fmt, width, height ); avpicture_deinterlace( &pv->pic_out, &pv->pic_in, pix_fmt, width, height ); hb_buffer_copy_settings( pv->buf_out[0], buf_in ); *buf_out = pv->buf_out[0]; return FILTER_OK; } /* Determine if top-field first layout */ int tff; if( pv->yadif_parity < 0 ) { tff = !!(buf_in->flags & PIC_FLAG_TOP_FIELD_FIRST); } else { tff = (pv->yadif_parity & 1) ^ 1; } /* Store current frame in yadif cache */ yadif_store_ref( (const uint8_t**)pv->pic_in.data, pv ); /* If yadif is not ready, store another ref and return FILTER_DELAY */ if( pv->yadif_ready == 0 ) { yadif_store_ref( (const uint8_t**)pv->pic_in.data, pv ); hb_buffer_copy_settings( pv->buf_settings, buf_in ); /* don't let 'work_loop' send a chapter mark upstream */ buf_in->new_chap = 0; pv->yadif_ready = 1; return FILTER_DELAY; } /* Perform yadif and mcdeint filtering */ int frame; for( frame = 0; frame <= (pv->yadif_mode & 1); frame++ ) { int parity = frame ^ tff ^ 1; avpicture_fill( &pv->pic_out, pv->buf_out[!(frame^1)]->data, pix_fmt, width, height ); yadif_filter( pv->pic_out.data, parity, tff, pv ); if( pv->mcdeint_mode >= 0 ) { avpicture_fill( &pv->pic_in, pv->buf_out[(frame^1)]->data, pix_fmt, width, height ); mcdeint_filter( pv->pic_in.data, pv->pic_out.data, parity, pv ); *buf_out = pv->buf_out[ (frame^1)]; } else { *buf_out = pv->buf_out[!(frame^1)]; } } /* Copy buffered settings to output buffer settings */ hb_buffer_copy_settings( *buf_out, pv->buf_settings ); /* Replace buffered settings with input buffer settings */ hb_buffer_copy_settings( pv->buf_settings, buf_in ); /* don't let 'work_loop' send a chapter mark upstream */ buf_in->new_chap = 0; return FILTER_OK; }