/* decavsub.c Copyright (c) 2003-2021 HandBrake Team This file is part of the HandBrake source code Homepage: . It may be used under the terms of the GNU General Public License v2. For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ #include "handbrake/handbrake.h" #include "handbrake/hbffmpeg.h" #include "handbrake/decavsub.h" struct hb_avsub_context_s { AVCodecContext * context; hb_job_t * job; hb_subtitle_t * subtitle; // For subs, when doing passthru, we don't know if we need a // packet until we have processed several packets. So we cache // all the packets we see until libav returns a subtitle with // the information we need. hb_buffer_list_t list_pass; // List of subtitle packets to be output by this decoder. hb_buffer_list_t list; // XXX: we may occasionally see subtitles with broken timestamps // while this should really get fixed elsewhere, // dropping subtitles should be avoided as much as possible int64_t last_pts; // For PGS subs, we need to pass 'empty' subtitles through (they clear the // display) - when doing forced-only extraction, only pass empty subtitles // through if we've seen a forced sub since the last empty sub uint8_t seen_forced_sub; }; struct hb_work_private_s { hb_avsub_context_t * ctx; }; hb_avsub_context_t * decavsubInit( hb_work_object_t * w, hb_job_t * job ) { hb_avsub_context_t * ctx = calloc( 1, sizeof( hb_avsub_context_t ) ); if (ctx == NULL) { return NULL; } ctx->seen_forced_sub = 0; ctx->last_pts = AV_NOPTS_VALUE; ctx->job = job; ctx->subtitle = w->subtitle; AVCodec * codec = avcodec_find_decoder(ctx->subtitle->codec_param); AVCodecContext * context = avcodec_alloc_context3(codec); context->codec = codec; hb_buffer_list_clear(&ctx->list); hb_buffer_list_clear(&ctx->list_pass); ctx->context = context; context->pkt_timebase.num = ctx->subtitle->timebase.num; context->pkt_timebase.den = ctx->subtitle->timebase.den; // Set decoder opts... AVDictionary * av_opts = NULL; av_dict_set( &av_opts, "sub_text_format", "ass", 0 ); if (ctx->subtitle->source == CC608SUB) { av_dict_set( &av_opts, "real_time", "1", 0 ); } if (ctx->subtitle->source == VOBSUB && ctx->subtitle->palette_set) { char * palette = hb_strdup_printf( "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", hb_yuv2rgb(ctx->subtitle->palette[0]), hb_yuv2rgb(ctx->subtitle->palette[1]), hb_yuv2rgb(ctx->subtitle->palette[2]), hb_yuv2rgb(ctx->subtitle->palette[3]), hb_yuv2rgb(ctx->subtitle->palette[4]), hb_yuv2rgb(ctx->subtitle->palette[5]), hb_yuv2rgb(ctx->subtitle->palette[6]), hb_yuv2rgb(ctx->subtitle->palette[7]), hb_yuv2rgb(ctx->subtitle->palette[8]), hb_yuv2rgb(ctx->subtitle->palette[9]), hb_yuv2rgb(ctx->subtitle->palette[10]), hb_yuv2rgb(ctx->subtitle->palette[11]), hb_yuv2rgb(ctx->subtitle->palette[12]), hb_yuv2rgb(ctx->subtitle->palette[13]), hb_yuv2rgb(ctx->subtitle->palette[14]), hb_yuv2rgb(ctx->subtitle->palette[15])); av_dict_set( &av_opts, "palette", palette, 0 ); free(palette); } if (hb_avcodec_open(ctx->context, codec, &av_opts, 0)) { av_dict_free( &av_opts ); free(ctx); hb_log("decsubInit: avcodec_open failed"); return NULL; } av_dict_free( &av_opts ); if (ctx->subtitle->format == TEXTSUB) { int height = job->title->geometry.height - job->crop[0] - job->crop[1]; int width = job->title->geometry.width - job->crop[2] - job->crop[3]; switch (ctx->subtitle->codec_param) { case AV_CODEC_ID_ASS: { // Extradata should already be filled in by demux } break; case AV_CODEC_ID_EIA_608: { // Mono font for CC hb_subtitle_add_ssa_header(ctx->subtitle, HB_FONT_MONO, 20, 384, 288); } break; default: { hb_subtitle_add_ssa_header(ctx->subtitle, HB_FONT_SANS, .066 * job->title->geometry.height, width, height); } break; } } return ctx; } static int decsubInit( hb_work_object_t * w, hb_job_t * job ) { hb_work_private_t * pv; pv = calloc( 1, sizeof( hb_work_private_t ) ); if (pv == NULL) { return 1; } pv->ctx = decavsubInit(w, job); if (pv->ctx == NULL) { free(pv); return 1; } w->private_data = pv; return 0; } static void make_empty_pgs( hb_buffer_t * buf ) { hb_buffer_t * b = buf; uint8_t done = 0; // Each buffer is composed of 1 or more segments. // Segment header is: // type - 1 byte // length - 2 bytes // We want to modify the presentation segment which is type 0x16 // // Note that every pgs display set is required to have a presentation // segment, so we will only have to look at one display set. while ( b && !done ) { int ii = 0; while (ii + 3 <= b->size) { uint8_t type; int len; int segment_len_pos; type = b->data[ii++]; segment_len_pos = ii; len = ((int)b->data[ii] << 8) + b->data[ii+1]; ii += 2; if (type == 0x16 && ii + len <= b->size) { int obj_count; int kk, jj = ii; int obj_start; // Skip // video descriptor 5 bytes // composition descriptor 3 bytes // palette update flg 1 byte // palette id ref 1 byte jj += 10; // Set number of composition objects to 0 obj_count = b->data[jj]; b->data[jj] = 0; jj++; obj_start = jj; // And remove all the composition objects for (kk = 0; kk < obj_count; kk++) { uint8_t crop; crop = b->data[jj + 3]; // skip // object id - 2 bytes // window id - 1 byte // object/forced flag - 1 byte // x pos - 2 bytes // y pos - 2 bytes jj += 8; if (crop & 0x80) { // skip // crop x - 2 bytes // crop y - 2 bytes // crop w - 2 bytes // crop h - 2 bytes jj += 8; } } if (jj < b->size) { memmove(b->data + obj_start, b->data + jj, b->size - jj); } b->size = obj_start + ( b->size - jj ); done = 1; len = obj_start - (segment_len_pos + 2); b->data[segment_len_pos] = len >> 8; b->data[segment_len_pos+1] = len & 0xff; break; } ii += len; } b = b->next; } } static void make_empty_sub( int source, hb_buffer_list_t * list_pass ) { switch (source) { case PGSSUB: make_empty_pgs(hb_buffer_list_head(list_pass)); break; case DVBSUB: break; default: hb_buffer_list_close(list_pass); break; } } // Returns a pointer to the first character after the ASS preamble static const char * ssa_text(const char * ssa) { int ii; const char * text = ssa; if (ssa == NULL) return NULL; for (ii = 0; ii < 8; ii++) { text = strchr(text, ','); if (text == NULL) break; text++; } return text; } int decavsubWork( hb_avsub_context_t * ctx, hb_buffer_t ** buf_in, hb_buffer_t ** buf_out ) { hb_buffer_t * in = *buf_in; if (in->s.flags & HB_BUF_FLAG_EOF) { /* EOF on input stream - send it downstream & say that we're done */ *buf_in = NULL; hb_buffer_list_append(&ctx->list, in); *buf_out = hb_buffer_list_clear(&ctx->list); return HB_WORK_DONE; } if (!ctx->job->indepth_scan && ctx->subtitle->config.dest == PASSTHRUSUB && hb_subtitle_can_pass(ctx->subtitle->source, ctx->job->mux)) { // Append to buffer list. It will be sent to fifo after we determine // if this is a packet we need. hb_buffer_list_append(&ctx->list_pass, in); // We are keeping the buffer, so prevent the filter loop from // deleting it. *buf_in = NULL; } AVSubtitle subtitle; memset( &subtitle, 0, sizeof(subtitle) ); int64_t duration = AV_NOPTS_VALUE; AVPacket avp; av_init_packet( &avp ); avp.data = in->data; avp.size = in->size; avp.pts = in->s.start; if (in->s.duration > 0 || ctx->subtitle->source != PGSSUB) { duration = in->s.duration; } if (duration <= 0 && in->s.start != AV_NOPTS_VALUE && in->s.stop != AV_NOPTS_VALUE && in->s.stop > in->s.start) { duration = in->s.stop - in->s.start; } int has_subtitle = 0; while (avp.size > 0) { int usedBytes = avcodec_decode_subtitle2(ctx->context, &subtitle, &has_subtitle, &avp ); if (usedBytes < 0) { hb_error("unable to decode subtitle with %d bytes.", avp.size); return HB_WORK_OK; } if (usedBytes == 0) { // We expect avcodec_decode_subtitle2 to return the number // of bytes consumed, or an error. If for some unforseen reason // it returns 0, lets not get stuck in an infinite loop! usedBytes = avp.size; } if (usedBytes <= avp.size) { avp.data += usedBytes; avp.size -= usedBytes; } else { avp.size = 0; } if (!has_subtitle) { continue; } uint8_t forced_sub = 0; uint8_t usable_sub = 0; uint8_t clear_sub = 0; // collect subtitle statistics for foreign audio search if (subtitle.num_rects) { ctx->subtitle->hits++; if (subtitle.rects[0]->flags & AV_SUBTITLE_FLAG_FORCED) { forced_sub = 1; ctx->subtitle->forced_hits++; } } else { clear_sub = 1; } // do we need this subtitle? usable_sub = // Need all subs !ctx->subtitle->config.force || // Need only forced subs forced_sub || // Need to terminate last forced sub (ctx->seen_forced_sub && clear_sub); // do we need to create an empty subtitle? if (ctx->subtitle->config.force && ctx->seen_forced_sub && !usable_sub) { // We are forced-only and need to output this subtitle, but // it's neither forced nor empty. // // If passthru, create an empty subtitle. // Also, flag an empty subtitle for subtitle RENDER. make_empty_sub(ctx->subtitle->source, &ctx->list_pass); usable_sub = clear_sub = 1; } if (!usable_sub) { // Discard accumulated passthrough subtitle data hb_buffer_list_close(&ctx->list_pass); avsubtitle_free(&subtitle); continue; } // Keep track of forced subs that we may need to manually // terminate with an empty subtitle packet. ctx->seen_forced_sub = forced_sub && !clear_sub; int64_t pts = AV_NOPTS_VALUE; hb_buffer_t * out = NULL; if (clear_sub) { duration = 0; } else if (ctx->subtitle->source != DVBSUB && ctx->subtitle->source != PGSSUB && subtitle.end_display_time > 0 && subtitle.end_display_time < UINT32_MAX) { duration = av_rescale(subtitle.end_display_time, 90000, 1000); } if (subtitle.pts != AV_NOPTS_VALUE) { pts = av_rescale(subtitle.pts, 90000, AV_TIME_BASE) + av_rescale(subtitle.start_display_time, 90000, 1000); } else { if (in->s.start >= 0) { pts = in->s.start; } else { // XXX: a broken pts will cause us to drop this subtitle, // which is bad; use a default duration of 3 seconds // // A broken pts is only generated when a subtitle packet // occurs after a discontinuity and before the // next audio or video packet which re-establishes // timing (afaik). if (ctx->last_pts == AV_NOPTS_VALUE) { pts = 0LL; } else { pts = ctx->last_pts + 3 * 90000LL; } hb_log("[warning] decavsub: track %d, invalid PTS", ctx->subtitle->out_track); } } // work around broken timestamps if (pts < ctx->last_pts) { // XXX: this should only happen if the previous pts // was unknown and our 3 second default duration // overshot the next subtitle pts. // // assign a 1 second duration hb_log("decavsub: track %d, non-monotically increasing PTS, last %"PRId64" current %"PRId64"", ctx->subtitle->out_track, ctx->last_pts, pts); pts = ctx->last_pts + 1 * 90000LL; } ctx->last_pts = pts; if (ctx->subtitle->format == TEXTSUB) { // TEXTSUB && (PASSTHROUGHSUB || RENDERSUB) // Text subtitles are treated the same regardless of // whether we are burning or passing through. They // get translated to SSA // // When using the "real_time" option with CC608 subtitles, // ffmpeg prepends an ASS rect that has only the preample // to every list of returned rects. libass doesn't like this // and logs a warning for every one of these. So strip these // out by using only the last rect in the list. // // Also, when a CC needs to be removed from the screen, ffmpeg // emits a single rect with only the preamble. Detect this // and flag an "End Of Subtitle" EOS. int ii = subtitle.num_rects - 1; const char * text = ssa_text(subtitle.rects[ii]->ass); if (!clear_sub && text != NULL && *text != 0) { int size = strlen(subtitle.rects[ii]->ass) + 1; out = hb_buffer_init(size); strcpy((char*)out->data, subtitle.rects[ii]->ass); } else { out = hb_buffer_init(0); out->s.flags = HB_BUF_FLAG_EOS; } hb_buffer_list_close(&ctx->list_pass); } else if (ctx->subtitle->config.dest == PASSTHRUSUB && hb_subtitle_can_pass(ctx->subtitle->source, ctx->job->mux)) { // PICTURESUB && PASSTHROUGHSUB // subtitles may be spread across multiple packets // // In the MKV container, all segments are found in the same // packet (this is expected by some devices, such as the // WD TV Live). So if there are multiple packets, // merge them. if (hb_buffer_list_count(&ctx->list_pass) == 1) { // packets already merged (e.g. MKV sources) out = hb_buffer_list_clear(&ctx->list_pass); out->s.start = AV_NOPTS_VALUE; out->s.stop = AV_NOPTS_VALUE; out->s.duration = (int64_t)AV_NOPTS_VALUE; } else { int size = 0; uint8_t * data; hb_buffer_t * b; b = hb_buffer_list_head(&ctx->list_pass); while (b != NULL) { size += b->size; b = b->next; } out = hb_buffer_init( size ); out->s.duration = (int64_t)AV_NOPTS_VALUE; data = out->data; b = hb_buffer_list_head(&ctx->list_pass); while (b != NULL) { memcpy(data, b->data, b->size); data += b->size; b = b->next; } hb_buffer_list_close(&ctx->list_pass); } if (clear_sub) { out->s.flags = HB_BUF_FLAG_EOS; } } else { // PICTURESUB && RENDERSUB if (!clear_sub) { unsigned ii, x0, y0, x1, y1, w, h; x0 = subtitle.rects[0]->x; y0 = subtitle.rects[0]->y; x1 = subtitle.rects[0]->x + subtitle.rects[0]->w; y1 = subtitle.rects[0]->y + subtitle.rects[0]->h; // First, find total bounding rectangle for (ii = 1; ii < subtitle.num_rects; ii++) { if (subtitle.rects[ii]->x < x0) x0 = subtitle.rects[ii]->x; if (subtitle.rects[ii]->y < y0) y0 = subtitle.rects[ii]->y; if (subtitle.rects[ii]->x + subtitle.rects[ii]->w > x1) x1 = subtitle.rects[ii]->x + subtitle.rects[ii]->w; if (subtitle.rects[ii]->y + subtitle.rects[ii]->h > y1) y1 = subtitle.rects[ii]->y + subtitle.rects[ii]->h; } w = x1 - x0; h = y1 - y0; out = hb_frame_buffer_init(AV_PIX_FMT_YUVA420P, w, h); memset(out->data, 0, out->size); out->f.x = x0; out->f.y = y0; out->f.window_width = ctx->context->width; out->f.window_height = ctx->context->height; for (ii = 0; ii < subtitle.num_rects; ii++) { AVSubtitleRect *rect = subtitle.rects[ii]; int off_x = rect->x - x0; int off_y = rect->y - y0; uint8_t *lum = out->plane[0].data; uint8_t *chromaU = out->plane[1].data; uint8_t *chromaV = out->plane[2].data; uint8_t *alpha = out->plane[3].data; lum += off_y * out->plane[0].stride + off_x; alpha += off_y * out->plane[3].stride + off_x; chromaU += (off_y >> 1) * out->plane[1].stride + (off_x >> 1); chromaV += (off_y >> 1) * out->plane[2].stride + (off_x >> 1); int xx, yy; for (yy = 0; yy < rect->h; yy++) { for (xx = 0; xx < rect->w; xx++) { uint32_t argb, yuv; int pixel; uint8_t color; pixel = yy * rect->w + xx; color = rect->data[0][pixel]; argb = ((uint32_t*)rect->data[1])[color]; yuv = hb_rgb2yuv(argb); lum[xx] = (yuv >> 16) & 0xff; alpha[xx] = (argb >> 24) & 0xff; if ((xx & 1) == 0 && (yy & 1) == 0) { chromaV[xx>>1] = (yuv >> 8) & 0xff; chromaU[xx>>1] = yuv & 0xff; } } lum += out->plane[0].stride; if ((yy & 1) == 0) { chromaU += out->plane[1].stride; chromaV += out->plane[2].stride; } alpha += out->plane[3].stride; } } } else { out = hb_buffer_init( 0 ); out->s.flags = HB_BUF_FLAG_EOS; out->f.x = 0; out->f.y = 0; out->f.width = 0; out->f.height = 0; duration = 0; } } out->s.id = in->s.id; out->s.scr_sequence = in->s.scr_sequence; out->s.frametype = HB_FRAME_SUBTITLE; out->s.start = pts; if (duration != AV_NOPTS_VALUE) { out->s.stop = pts + duration; out->s.duration = duration; } hb_buffer_list_append(&ctx->list, out); avsubtitle_free(&subtitle); } *buf_out = hb_buffer_list_clear(&ctx->list); return HB_WORK_OK; } static int decsubWork( hb_work_object_t * w, hb_buffer_t ** buf_in, hb_buffer_t ** buf_out ) { hb_work_private_t * pv = w->private_data; return decavsubWork(pv->ctx, buf_in, buf_out ); } void decavsubClose( hb_avsub_context_t * ctx ) { if (ctx == NULL) { return; } hb_buffer_list_close(&ctx->list_pass); avcodec_flush_buffers(ctx->context); avcodec_free_context(&ctx->context); free(ctx); } static void decsubClose( hb_work_object_t * w ) { hb_work_private_t * pv = w->private_data; if (pv == NULL) { return; } decavsubClose(pv->ctx); free(pv); w->private_data = NULL; } hb_work_object_t hb_decavsub = { .id = WORK_DECAVSUB, .name = "Subtitle decoder (libavcodec)", .init = decsubInit, .work = decsubWork, .close = decsubClose, };