diff options
author | jstebbins <[email protected]> | 2013-12-02 21:25:41 +0000 |
---|---|---|
committer | jstebbins <[email protected]> | 2013-12-02 21:25:41 +0000 |
commit | 87ba2a39cc223742796170d87a8563c2e64910d8 (patch) | |
tree | 12348638a37e6ec22aefeecc0b9cdbfd03332e16 | |
parent | 6f68c72ba9055ced110782e250ae824ff076a23e (diff) |
libhb: remove use of avpicture_deinterlace
This function is being remove from libav. so replace it's functionality
with a threaded implementation of our own.
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5911 b64f7644-9d1e-0410-96f1-a4d463321fa5
-rw-r--r-- | libhb/decomb.c | 218 | ||||
-rw-r--r-- | libhb/deinterlace.c | 240 | ||||
-rw-r--r-- | libhb/hb.c | 10 | ||||
-rw-r--r-- | libhb/internal.h | 2 |
4 files changed, 338 insertions, 132 deletions
diff --git a/libhb/decomb.c b/libhb/decomb.c index 74ea8ec94..efad5179c 100644 --- a/libhb/decomb.c +++ b/libhb/decomb.c @@ -189,6 +189,12 @@ struct hb_filter_private_s taskset_t eedi2_taskset; // Threads for eedi2 - one per plane }; +typedef struct +{ + int tap[5]; + int normalize; +} filter_param_t; + static int hb_decomb_init( hb_filter_object_t * filter, hb_filter_init_t * init ); @@ -209,20 +215,36 @@ hb_filter_object_t hb_filter_decomb = .close = hb_decomb_close, }; -int cubic_interpolate_pixel( int y0, int y1, int y2, int y3 ) +// Borrowed from libav +#define times4(x) x, x, x, x +#define times1024(x) times4(times4(times4(times4(times4(x))))) + +static const uint8_t hb_crop_table[256 + 2 * 1024] = { +times1024(0x00), +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, +0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, +times1024(0xFF) +}; + +static inline int cubic_interpolate_pixel( int y0, int y1, int y2, int y3 ) { /* From http://www.neuron2.net/library/cubicinterp.html */ int result = ( y0 * -3 ) + ( y1 * 23 ) + ( y2 * 23 ) + ( y3 * -3 ); - result /= 40; - - if( result > 255 ) - { - result = 255; - } - else if( result < 0 ) - { - result = 0; - } + result = hb_crop_table[(result / 40) + 1024]; return result; } @@ -288,7 +310,7 @@ static void cubic_interpolate_line( } } -void draw_mask_box( hb_filter_private_t * pv ) +static void draw_mask_box( hb_filter_private_t * pv ) { int x = pv->mask_box_x; int y = pv->mask_box_y; @@ -323,9 +345,9 @@ void draw_mask_box( hb_filter_private_t * pv ) } } -void apply_mask_line( uint8_t * srcp, - uint8_t * mskp, - int width ) +static void apply_mask_line( uint8_t * srcp, + uint8_t * mskp, + int width ) { int x; @@ -342,7 +364,7 @@ void apply_mask_line( uint8_t * srcp, } } -void apply_mask(hb_filter_private_t * pv, hb_buffer_t * b) +static void apply_mask(hb_filter_private_t * pv, hb_buffer_t * b) { /* draw_boxes */ draw_mask_box( pv ); @@ -391,30 +413,24 @@ static void store_ref(hb_filter_private_t * pv, hb_buffer_t * b) pv->ref[2] = b; } -int blend_filter_pixel( int up2, int up1, int current, int down1, int down2 ) +static inline int blend_filter_pixel(filter_param_t *filter, int up2, int up1, int current, int down1, int down2) { /* Low-pass 5-tap filter */ int result = 0; - result += -up2; - result += up1 * 2; - result += current * 6; - result += down1 *2; - result += -down2; - result /= 8; - if( result > 255 ) - { - result = 255; - } - if( result < 0 ) - { - result = 0; - } + result += up2 * filter->tap[0]; + result += up1 * filter->tap[1]; + result += current * filter->tap[2]; + result += down1 * filter->tap[3]; + result += down2 * filter->tap[4]; + result >>= filter->normalize; + result = hb_crop_table[result + 1024]; return result; } -static void blend_filter_line( uint8_t *dst, +static void blend_filter_line(filter_param_t *filter, + uint8_t *dst, uint8_t *cur, int width, int height, @@ -468,15 +484,15 @@ static void blend_filter_line( uint8_t *dst, for( x = 0; x < w; x++) { - dst[0] = blend_filter_pixel( cur[up2], cur[up1], cur[0], - cur[down1], cur[down2] ); - + /* Low-pass 5-tap filter */ + dst[0] = blend_filter_pixel(filter, cur[up2], cur[up1], cur[0], + cur[down1], cur[down2] ); dst++; cur++; } } -void reset_combing_results( hb_filter_private_t * pv ) +static void reset_combing_results( hb_filter_private_t * pv ) { pv->comb_check_complete = 0; int ii; @@ -486,7 +502,7 @@ void reset_combing_results( hb_filter_private_t * pv ) } } -int check_combing_results( hb_filter_private_t * pv ) +static int check_combing_results( hb_filter_private_t * pv ) { int threshold = pv->block_threshold; int send_to_blend = 0; @@ -523,7 +539,7 @@ int check_combing_results( hb_filter_private_t * pv ) } } -void check_filtered_combing_mask( hb_filter_private_t * pv, int segment, int start, int stop ) +static void check_filtered_combing_mask( hb_filter_private_t * pv, int segment, int start, int stop ) { /* Go through the mask in X*Y blocks. If any of these windows have threshold or more combed pixels, consider the whole @@ -591,7 +607,7 @@ void check_filtered_combing_mask( hb_filter_private_t * pv, int segment, int sta } } -void check_combing_mask( hb_filter_private_t * pv, int segment, int start, int stop ) +static void check_combing_mask( hb_filter_private_t * pv, int segment, int start, int stop ) { /* Go through the mask in X*Y blocks. If any of these windows have threshold or more combed pixels, consider the whole @@ -670,7 +686,7 @@ void check_combing_mask( hb_filter_private_t * pv, int segment, int start, int s } } -void build_gamma_lut( hb_filter_private_t * pv ) +static void build_gamma_lut( hb_filter_private_t * pv ) { int i; for( i = 0; i < 256; i++ ) @@ -679,12 +695,7 @@ void build_gamma_lut( hb_filter_private_t * pv ) } } -float scale_gamma( int pixel, hb_filter_private_t * pv ) -{ - return pv->gamma_lut[pixel]; -} - -void detect_gamma_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop ) +static void detect_gamma_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop ) { /* A mish-mash of various comb detection tricks picked up from neuron2's Decomb plugin for @@ -756,7 +767,6 @@ void detect_gamma_combed_segment( hb_filter_private_t * pv, int segment_start, i fabs( pv->gamma_lut[prev[down_1]] - pv->gamma_lut[cur[down_1]] ) > mthresh ) motion++; -// hb_log("prev->cur motion: %f, mthresh: %f", fabs( scale_gamma( prev[0] ) - scale_gamma( cur[0] ) ), mthresh); } else { @@ -794,7 +804,7 @@ void detect_gamma_combed_segment( hb_filter_private_t * pv, int segment_start, i } -void detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop ) +static void detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop ) { /* A mish-mash of various comb detection tricks picked up from neuron2's Decomb plugin for @@ -931,7 +941,7 @@ void detect_combed_segment( hb_filter_private_t * pv, int segment_start, int seg // This function calls all the eedi2 filters in sequence for a given plane. // It outputs the final interpolated image to pv->eedi_full[DST2PF]. -void eedi2_interpolate_plane( hb_filter_private_t * pv, int plane ) +static void eedi2_interpolate_plane( hb_filter_private_t * pv, int plane ) { /* We need all these pointers. No, seriously. I swear. It's not a joke. They're used. @@ -1011,7 +1021,7 @@ void eedi2_interpolate_plane( hb_filter_private_t * pv, int plane ) /* * eedi2 interpolate this plane in a single thread. */ -void eedi2_filter_thread( void *thread_args_v ) +static void eedi2_filter_thread( void *thread_args_v ) { hb_filter_private_t * pv; int plane; @@ -1041,7 +1051,7 @@ void eedi2_filter_thread( void *thread_args_v ) * Process plane */ eedi2_interpolate_plane( pv, plane ); - + /* * Finished this segment, let everyone know. */ @@ -1053,7 +1063,7 @@ void eedi2_filter_thread( void *thread_args_v ) // Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF] // and then runs eedi2_filter_thread for each plane. -void eedi2_planer( hb_filter_private_t * pv ) +static void eedi2_planer( hb_filter_private_t * pv ) { /* Copy the first field from the source to a half-height frame. */ int pp; @@ -1076,7 +1086,7 @@ void eedi2_planer( hb_filter_private_t * pv ) } -void mask_dilate_thread( void *thread_args_v ) +static void mask_dilate_thread( void *thread_args_v ) { hb_filter_private_t * pv; int segment, segment_start, segment_stop; @@ -1100,7 +1110,7 @@ void mask_dilate_thread( void *thread_args_v ) * No more work to do, exit this thread. */ break; - } + } int xx, yy, pp; @@ -1178,7 +1188,7 @@ void mask_dilate_thread( void *thread_args_v ) taskset_thread_complete( &pv->mask_dilate_taskset, segment ); } -void mask_erode_thread( void *thread_args_v ) +static void mask_erode_thread( void *thread_args_v ) { hb_filter_private_t * pv; int segment, segment_start, segment_stop; @@ -1202,7 +1212,7 @@ void mask_erode_thread( void *thread_args_v ) * No more work to do, exit this thread. */ break; - } + } int xx, yy, pp; @@ -1280,7 +1290,7 @@ void mask_erode_thread( void *thread_args_v ) taskset_thread_complete( &pv->mask_erode_taskset, segment ); } -void mask_filter_thread( void *thread_args_v ) +static void mask_filter_thread( void *thread_args_v ) { hb_filter_private_t * pv; int segment, segment_start, segment_stop; @@ -1304,7 +1314,7 @@ void mask_filter_thread( void *thread_args_v ) * No more work to do, exit this thread. */ break; - } + } int xx, yy, pp; @@ -1383,7 +1393,7 @@ void mask_filter_thread( void *thread_args_v ) taskset_thread_complete( &pv->mask_filter_taskset, segment ); } -void decomb_check_thread( void *thread_args_v ) +static void decomb_check_thread( void *thread_args_v ) { hb_filter_private_t * pv; int segment, segment_start, segment_stop; @@ -1407,7 +1417,7 @@ void decomb_check_thread( void *thread_args_v ) * No more work to do, exit this thread. */ break; - } + } segment_start = thread_args->segment_start[0]; segment_stop = segment_start + thread_args->segment_height[0]; @@ -1433,7 +1443,7 @@ void decomb_check_thread( void *thread_args_v ) /* * comb detect this segment of all three planes in a single thread. */ -void decomb_filter_thread( void *thread_args_v ) +static void decomb_filter_thread( void *thread_args_v ) { hb_filter_private_t * pv; int segment, segment_start, segment_stop; @@ -1457,7 +1467,7 @@ void decomb_filter_thread( void *thread_args_v ) * No more work to do, exit this thread. */ break; - } + } /* * Process segment (for now just from luma) @@ -1487,14 +1497,14 @@ void decomb_filter_thread( void *thread_args_v ) taskset_thread_complete( &pv->decomb_filter_taskset, segment ); } -int comb_segmenter( hb_filter_private_t * pv ) +static int comb_segmenter( hb_filter_private_t * pv ) { /* * Now that all data for decomb detection is ready for * our threads, fire them off and wait for their completion. */ taskset_cycle( &pv->decomb_filter_taskset ); - + if( pv->mode & MODE_FILTER ) { taskset_cycle( &pv->mask_filter_taskset ); @@ -1682,12 +1692,20 @@ static void yadif_filter_line( /* * deinterlace this segment of all three planes in a single thread. */ -void yadif_decomb_filter_thread( void *thread_args_v ) +static void yadif_decomb_filter_thread( void *thread_args_v ) { yadif_arguments_t *yadif_work = NULL; hb_filter_private_t * pv; int segment, segment_start, segment_stop; yadif_thread_arg_t *thread_args = thread_args_v; + filter_param_t filter; + + filter.tap[0] = -1; + filter.tap[1] = 2; + filter.tap[2] = 6; + filter.tap[3] = 2; + filter.tap[4] = -1; + filter.normalize = 3; pv = thread_args->pv; segment = thread_args->segment; @@ -1700,14 +1718,14 @@ void yadif_decomb_filter_thread( void *thread_args_v ) * Wait here until there is work to do. */ taskset_thread_wait4start( &pv->yadif_taskset, segment ); - + if( taskset_thread_stop( &pv->yadif_taskset, segment ) ) { /* * No more work to do, exit this thread. */ break; - } + } yadif_work = &pv->yadif_arguments[segment]; @@ -1747,7 +1765,7 @@ void yadif_decomb_filter_thread( void *thread_args_v ) for( yy = start; yy < segment_stop; yy += 2 ) { /* This line gets blend filtered, not yadif filtered. */ - blend_filter_line(dst2, cur, width, height, stride, yy); + blend_filter_line(&filter, dst2, cur, width, height, stride, yy); dst2 += stride * 2; cur += stride * 2; } @@ -1961,7 +1979,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, pv->yadif_ready = 0; - pv->mode = MODE_YADIF | MODE_BLEND | MODE_CUBIC | + pv->mode = MODE_YADIF | MODE_BLEND | MODE_CUBIC | MODE_GAMMA | MODE_FILTER; pv->filter_mode = FILTER_ERODE_DILATE; pv->spatial_metric = 2; @@ -2037,7 +2055,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, init->pix_fmt, init->width, init->height); } } - + /* * Setup yadif taskset. */ @@ -2103,7 +2121,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, for( ii = 0; ii < pv->cpu_count; ii++ ) { decomb_thread_arg_t *thread_args; - + thread_args = taskset_thread_args( &pv->decomb_filter_taskset, ii ); thread_args->pv = pv; thread_args->segment = ii; @@ -2161,7 +2179,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, for( ii = 0; ii < pv->comb_check_nthreads; ii++ ) { decomb_thread_arg_t *thread_args, *decomb_prev_thread_args = NULL; - + thread_args = taskset_thread_args( &pv->decomb_check_taskset, ii ); thread_args->pv = pv; thread_args->segment = ii; @@ -2218,7 +2236,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, for( ii = 0; ii < pv->cpu_count; ii++ ) { decomb_thread_arg_t *thread_args; - + thread_args = taskset_thread_args( &pv->mask_filter_taskset, ii ); thread_args->pv = pv; thread_args->segment = ii; @@ -2269,7 +2287,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, for( ii = 0; ii < pv->cpu_count; ii++ ) { decomb_thread_arg_t *thread_args; - + thread_args = taskset_thread_args( &pv->mask_erode_taskset, ii ); thread_args->pv = pv; thread_args->segment = ii; @@ -2318,7 +2336,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, for( ii = 0; ii < pv->cpu_count; ii++ ) { decomb_thread_arg_t *thread_args; - + thread_args = taskset_thread_args( &pv->mask_dilate_taskset, ii ); thread_args->pv = pv; thread_args->segment = ii; @@ -2410,7 +2428,7 @@ static int hb_decomb_init( hb_filter_object_t * filter, } } } - + return 0; } @@ -2444,7 +2462,7 @@ static void hb_decomb_close( hb_filter_object_t * filter ) taskset_fini( &pv->eedi2_taskset ); } - + /* Cleanup reference buffers. */ int ii; for (ii = 0; ii < 3; ii++) @@ -2480,14 +2498,14 @@ static void hb_decomb_close( hb_filter_object_t * filter ) if (pv->cxy) eedi2_aligned_free(pv->cxy); if (pv->tmpc) eedi2_aligned_free(pv->tmpc); } - + free(pv->block_score); /* * free memory for yadif structs */ free( pv->yadif_arguments ); - + free( pv ); filter->private_data = NULL; } @@ -2539,12 +2557,12 @@ static int hb_decomb_work( hb_filter_object_t * filter, { num_frames = 2; } - + // Will need up to 2 buffers simultaneously int idx = 0; hb_buffer_t * o_buf[2] = {NULL,}; - /* Perform yadif filtering */ + /* Perform yadif filtering */ for( frame = 0; frame < num_frames; frame++ ) { int parity = frame ^ tff ^ 1; @@ -2626,3 +2644,41 @@ static int hb_decomb_work( hb_filter_object_t * filter, return HB_FILTER_OK; } + +void hb_deinterlace(hb_buffer_t *dst, hb_buffer_t *src) +{ + int pp; + filter_param_t filter; + + filter.tap[0] = -1; + filter.tap[1] = 4; + filter.tap[2] = 2; + filter.tap[3] = 4; + filter.tap[4] = -1; + filter.normalize = 3; + + for (pp = 0; pp < 3; pp++) + { + int yy; + int width = src->plane[pp].width; + int stride = src->plane[pp].stride; + int height = src->plane[pp].height; + + // Filter parity lines + uint8_t *pdst = &dst->plane[pp].data[0]; + uint8_t *psrc = &src->plane[pp].data[0]; + + /* These will be useful if we ever do temporal blending. */ + for( yy = 0; yy < height; yy += 2 ) + { + /* This line gets blend filtered, not yadif filtered. */ + memcpy(pdst, psrc, width); + pdst += stride; + psrc += stride; + blend_filter_line(&filter, pdst, psrc, width, height, stride, yy); + pdst += stride; + psrc += stride; + } + } +} + diff --git a/libhb/deinterlace.c b/libhb/deinterlace.c index 09b92c437..69b1c31bc 100644 --- a/libhb/deinterlace.c +++ b/libhb/deinterlace.c @@ -40,6 +40,16 @@ typedef struct yadif_arguments_s { int tff; } yadif_arguments_t; +typedef struct deint_arguments_s { + hb_buffer_t * src; + hb_buffer_t * dst; +} deint_arguments_t; + +typedef struct deint_thread_arg_s { + hb_filter_private_t *pv; + int segment; +} deint_thread_arg_t; + struct hb_filter_private_s { int width; @@ -52,9 +62,14 @@ struct hb_filter_private_s hb_buffer_t * yadif_ref[3]; int cpu_count; + int segments; + + int deint_nsegs; - taskset_t yadif_taskset; // Threads for Yadif - one per CPU + taskset_t deint_taskset; // Threads for fast deint + taskset_t yadif_taskset; // Threads for Yadif + deint_arguments_t *deint_arguments; // Arguments to thread for work yadif_arguments_t *yadif_arguments; // Arguments to thread for work }; @@ -192,17 +207,16 @@ void yadif_filter_thread( void *thread_args_v ) */ run = 0; goto report_completion; - } + } yadif_work = &pv->yadif_arguments[segment]; if( yadif_work->dst == NULL ) { hb_error( "Thread started when no work available" ); - hb_snooze(500); goto report_completion; } - + /* * Process all three planes, but only this segment of it. */ @@ -218,9 +232,9 @@ void yadif_filter_thread( void *thread_args_v ) int tff = yadif_work->tff; int penultimate = h - 2; - int segment_height = (h / pv->cpu_count) & ~1; + int segment_height = (h / pv->segments) & ~1; segment_start = segment_height * segment; - if( segment == pv->cpu_count - 1 ) + if( segment == pv->segments - 1 ) { /* * Final segment @@ -248,7 +262,7 @@ void yadif_filter_thread( void *thread_args_v ) { /* This isn't the top or bottom, * proceed as normal to yadif. */ - yadif_filter_line(pv, dst2, prev, cur, next, w, s, + yadif_filter_line(pv, dst2, prev, cur, next, w, s, parity ^ tff); } else @@ -295,8 +309,8 @@ static void yadif_filter( hb_filter_private_t * pv, int segment; - for( segment = 0; segment < pv->cpu_count; segment++ ) - { + for( segment = 0; segment < pv->segments; segment++ ) + { /* * Setup the work for this plane. */ @@ -313,6 +327,128 @@ static void yadif_filter( hb_filter_private_t * pv, */ } +/* + * deinterlace a frame in a single thread. + */ +void deint_filter_thread( void *thread_args_v ) +{ + deint_arguments_t *args = NULL; + hb_filter_private_t * pv; + int run = 1; + int segment; + deint_thread_arg_t *thread_args = thread_args_v; + + pv = thread_args->pv; + segment = thread_args->segment; + + hb_log("Fast Deinterlace thread started for segment %d", segment); + + while( run ) + { + /* + * Wait here until there is work to do. + */ + taskset_thread_wait4start( &pv->deint_taskset, segment ); + + + if( taskset_thread_stop( &pv->deint_taskset, segment ) ) + { + /* + * No more work to do, exit this thread. + */ + run = 0; + goto report_completion; + } + + args = &pv->deint_arguments[segment]; + + if( args->dst == NULL ) + { + // This can happen when flushing final buffers. + goto report_completion; + } + + /* + * Process all three planes, but only this segment of it. + */ + hb_deinterlace(args->dst, args->src); + +report_completion: + /* + * Finished this segment, let everyone know. + */ + taskset_thread_complete( &pv->deint_taskset, segment ); + } +} + +/* + * threaded fast deint - each thread deinterlaces a single frame. + * + * This function blocks until all frames are deinterlaced. + */ +static hb_buffer_t * deint_fast(hb_filter_private_t * pv, hb_buffer_t * in) +{ + + int ii; + hb_buffer_t *dst, *src; + + if (in != NULL) + { + dst = hb_frame_buffer_init(in->f.fmt, in->f.width, in->f.height); + pv->deint_arguments[pv->deint_nsegs].src = in; + pv->deint_arguments[pv->deint_nsegs].dst = dst; + pv->deint_nsegs++; + } + if (in != NULL && pv->deint_nsegs < pv->segments) + { + return NULL; + } + + if (pv->deint_nsegs > 0) + { + /* Allow the taskset threads to make one pass over the data. */ + taskset_cycle( &pv->deint_taskset ); + } + + hb_buffer_t *first = NULL, *last = NULL; + for (ii = 0; ii < pv->deint_nsegs; ii++) + { + src = pv->deint_arguments[ii].src; + dst = pv->deint_arguments[ii].dst; + pv->deint_arguments[ii].src = NULL; + pv->deint_arguments[ii].dst = NULL; + if (first == NULL) + { + first = dst; + } + if (last != NULL) + { + last->next = dst; + } + last = dst; + + dst->s = src->s; + hb_buffer_move_subs(dst, src); + hb_buffer_close(&src); + } + if (in == NULL) + { + // Flushing final buffers. Append EOS marker buffer. + dst = hb_buffer_init(0); + if (first == NULL) + { + first = dst; + } + else + { + last->next = dst; + } + } + pv->deint_nsegs = 0; + + return first; +} + static int hb_deinterlace_init( hb_filter_object_t * filter, hb_filter_init_t * init ) { @@ -341,16 +477,17 @@ static int hb_deinterlace_init( hb_filter_object_t * filter, /* * Setup yadif taskset. */ - pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count ); + pv->segments = pv->cpu_count; + pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->segments ); if( pv->yadif_arguments == NULL || - taskset_init( &pv->yadif_taskset, /*thread_count*/pv->cpu_count, - sizeof( yadif_arguments_t ) ) == 0 ) + taskset_init( &pv->yadif_taskset, /*thread_count*/pv->segments, + sizeof( yadif_thread_arg_t ) ) == 0 ) { hb_error( "yadif could not initialize taskset" ); } int ii; - for( ii = 0; ii < pv->cpu_count; ii++ ) + for( ii = 0; ii < pv->segments; ii++ ) { yadif_thread_arg_t *thread_args; @@ -369,7 +506,41 @@ static int hb_deinterlace_init( hb_filter_object_t * filter, } } } - + else + { + /* + * Setup fast deint taskset. + */ + pv->segments = pv->cpu_count; + pv->deint_arguments = malloc( sizeof( deint_arguments_t ) * pv->segments ); + if( pv->deint_arguments == NULL || + taskset_init( &pv->deint_taskset, pv->segments, + sizeof( deint_thread_arg_t ) ) == 0 ) + { + hb_error( "deint could not initialize taskset" ); + } + + int ii; + for( ii = 0; ii < pv->segments; ii++ ) + { + deint_thread_arg_t *thread_args; + + thread_args = taskset_thread_args( &pv->deint_taskset, ii ); + + thread_args->pv = pv; + thread_args->segment = ii; + pv->deint_arguments[ii].dst = NULL; + + if( taskset_thread_spawn( &pv->deint_taskset, ii, + "deint_filter_segment", + deint_filter_thread, + HB_NORMAL_PRIORITY ) == 0 ) + { + hb_error( "deint could not spawn thread" ); + } + } + } + return 0; } @@ -395,37 +566,16 @@ static void hb_deinterlace_close( hb_filter_object_t * filter ) free( pv->yadif_arguments ); } - + else + { + taskset_fini( &pv->deint_taskset ); + free( pv->deint_arguments ); + } + free( pv ); filter->private_data = NULL; } -static hb_buffer_t * deint_fast(hb_buffer_t * in) -{ - AVPicture pic_in; - AVPicture pic_out; - hb_buffer_t * out; - - int w = (in->plane[0].width + 3) & ~0x3; - int h = (in->plane[0].height + 3) & ~0x3; - - out = hb_frame_buffer_init(in->f.fmt, in->f.width, in->f.height); - - hb_avpicture_fill( &pic_in, in ); - hb_avpicture_fill( &pic_out, out ); - - // avpicture_deinterlace requires 4 pixel aligned width and height - // we have aligned all buffers to 16 byte width and height strides - // so there is room in the buffers to accomodate a litte - // overscan. - avpicture_deinterlace(&pic_out, &pic_in, out->f.fmt, w, h); - - out->s = in->s; - hb_buffer_move_subs(out, in); - - return out; -} - static int hb_deinterlace_work( hb_filter_object_t * filter, hb_buffer_t ** buf_in, hb_buffer_t ** buf_out ) @@ -438,13 +588,19 @@ static int hb_deinterlace_work( hb_filter_object_t * filter, { *buf_out = in; *buf_in = NULL; + if( !( pv->yadif_mode & MODE_YADIF_ENABLE ) ) + { + // Flush final frames + *buf_out = deint_fast(pv, NULL); + } return HB_FILTER_DONE; } /* Use libavcodec deinterlace if yadif_mode < 0 */ if( !( pv->yadif_mode & MODE_YADIF_ENABLE ) ) { - *buf_out = deint_fast(in); + *buf_in = NULL; + *buf_out = deint_fast(pv, in); return HB_FILTER_OK; } diff --git a/libhb/hb.c b/libhb/hb.c index 760683d76..b9730802d 100644 --- a/libhb/hb.c +++ b/libhb/hb.c @@ -786,19 +786,11 @@ void hb_get_preview( hb_handle_t * h, hb_job_t * job, int picture, if( job->deinterlace ) { - int width = (in_buf->plane[0].width + 3) & ~0x3; - int height = (in_buf->plane[0].height + 3) & ~0x3; - // Deinterlace and crop - // avpicture_deinterlace requires 4 pixel aligned width and height - // we have aligned all buffers to 16 byte width and height strides - // so there is room in the buffers to accomodate a litte - // overscan. deint_buf = hb_frame_buffer_init( AV_PIX_FMT_YUV420P, title->width, title->height ); + hb_deinterlace(deint_buf, in_buf); hb_avpicture_fill( &pic_deint, deint_buf ); - avpicture_deinterlace( &pic_deint, &pic_in, AV_PIX_FMT_YUV420P, - width, height ); av_picture_crop( &pic_crop, &pic_deint, AV_PIX_FMT_YUV420P, job->crop[0], job->crop[2] ); diff --git a/libhb/internal.h b/libhb/internal.h index 643deb9a2..a18f1f111 100644 --- a/libhb/internal.h +++ b/libhb/internal.h @@ -495,3 +495,5 @@ DECLARE_MUX( avformat ); void hb_muxmp4_process_subtitle_style( uint8_t *input, uint8_t *output, uint8_t *style, uint16_t *stylesize ); + +void hb_deinterlace(hb_buffer_t *dst, hb_buffer_t *src); |