diff options
author | dynaflash <[email protected]> | 2010-05-04 16:56:43 +0000 |
---|---|---|
committer | dynaflash <[email protected]> | 2010-05-04 16:56:43 +0000 |
commit | 15aa5c37af71153316adcd7d56dbe7d12ad895c2 (patch) | |
tree | 87a38102ec17ffbc9af3329a5641417c5c379474 | |
parent | 5a0738fc0c12c7ce73346b8e7c06d67a28fdcbfa (diff) |
Universal Text Subtitle Support Initial Implementation
- Patch by davidfstr ... Thanks!
- Adds support for reading TEXT subtitle tracks from file inputs
- Tested combinations:
-- MKV UTF-8 -> MKV UTF-8 (passthru)
-- MKV UTF-8 -> MP4 TX3G (upconvert)
-- MP4 TX3G -> MKV UTF-8 (downconvert)
-- MP4 TX3G -> MP4 TX3G (downconvert to UTF-8 then upconvert)
- Further explained here http://forum.handbrake.fr/viewtopic.php?f=4&t=16099
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@3283 b64f7644-9d1e-0410-96f1-a4d463321fa5
-rw-r--r-- | libhb/common.h | 35 | ||||
-rw-r--r-- | libhb/dectx3gsub.c | 240 | ||||
-rw-r--r-- | libhb/decutf8sub.c | 64 | ||||
-rw-r--r-- | libhb/hb.c | 4 | ||||
-rw-r--r-- | libhb/internal.h | 31 | ||||
-rw-r--r-- | libhb/stream.c | 74 | ||||
-rw-r--r-- | libhb/sync.c | 4 | ||||
-rw-r--r-- | libhb/work.c | 21 |
8 files changed, 463 insertions, 10 deletions
diff --git a/libhb/common.h b/libhb/common.h index 33c3da2f7..cdb4d9fe1 100644 --- a/libhb/common.h +++ b/libhb/common.h @@ -125,6 +125,8 @@ struct hb_subtitle_config_s enum subdest { RENDERSUB, PASSTHRUSUB } dest; int force; int default_track; + + /* SRT subtitle tracks only */ char src_filename[128]; char src_codeset[40]; int64_t offset; @@ -465,6 +467,35 @@ struct hb_chapter_s char title[1024]; }; +/* + * A subtitle track. + * + * Required fields when a demuxer creates a subtitle track are: + * > id + * - ID of this track + * - must be unique for all tracks within a single job, + * since it is used to look up the appropriate in-FIFO with GetFifoForId() + * > format + * - format of the packets the subtitle decoder work-object sends to sub->fifo_raw + * - for passthru subtitles, is also the format of the final packets sent to sub->fifo_out + * - PICTURESUB for banded 8-bit YAUV pixels + * - TEXTSUB for UTF-8 text marked up with <b>, <i>, or <u> + * - read by the muxers, and by the subtitle burn-in logic in the hb_sync_video work-object + * > source + * - used to create the appropriate subtitle decoder work-object in do_job() + * > config.dest + * - whether to render the subtitle on the video track (RENDERSUB) or + * to pass it through its own subtitle track in the output container (PASSTHRUSUB) + * - for legacy compatibility, all newly created VOBSUB tracks should default to RENDERSUB + * - since only VOBSUBs are renderable (as of 2010-04-25), all other newly created + * subtitle track types should default to PASSTHRUSUB + * > lang + * - user-readable description of the subtitle track + * - may correspond to the language of the track (see the 'iso639_2' field) + * - may correspond to the type of track (see the 'type' field; ex: "Closed Captions") + * > iso639_2 + * - language code for the subtitle, or "und" if unknown + */ struct hb_subtitle_s { int id; @@ -473,7 +504,7 @@ struct hb_subtitle_s hb_subtitle_config_t config; enum subtype { PICTURESUB, TEXTSUB } format; - enum subsource { VOBSUB, SRTSUB, CC608SUB, CC708SUB } source; + enum subsource { VOBSUB, SRTSUB, CC608SUB, /*unused*/CC708SUB, UTF8SUB, TX3GSUB } source; char lang[1024]; char iso639_2[4]; uint8_t type; /* Closed Caption, Childrens, Directors etc */ @@ -693,6 +724,8 @@ extern hb_work_object_t hb_decvobsub; extern hb_work_object_t hb_encvobsub; extern hb_work_object_t hb_deccc608; extern hb_work_object_t hb_decsrtsub; +extern hb_work_object_t hb_decutf8sub; +extern hb_work_object_t hb_dectx3gsub; extern hb_work_object_t hb_render; extern hb_work_object_t hb_encavcodec; extern hb_work_object_t hb_encx264; diff --git a/libhb/dectx3gsub.c b/libhb/dectx3gsub.c new file mode 100644 index 000000000..fe413ad98 --- /dev/null +++ b/libhb/dectx3gsub.c @@ -0,0 +1,240 @@ +/* + This file is part of the HandBrake source code. + Homepage: <http://handbrake.fr/>. + It may be used under the terms of the GNU General Public License. */ + +/* + * Converts TX3G subtitles to UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>). + * + * TX3G == MPEG 4, Part 17 (ISO/IEC 14496-17) == 3GPP Timed Text (26.245) + * A full reference to the format can be found here: + * http://www.3gpp.org/ftp/Specs/html-info/26245.htm + * + * @author David Foster (davidfstr) + */ + +#include <stdlib.h> +#include <stdio.h> +#include "hb.h" + +typedef enum { + BOLD = 0x1, + ITALIC = 0x2, + UNDERLINE = 0x4 +} FaceStyleFlag; + +#define NUM_FACE_STYLE_FLAGS 3 +#define MAX_OPEN_TAG_SIZE 3 // "<b>" +#define MAX_CLOSE_TAG_SIZE 4 // "</b>" + +typedef struct { + uint16_t startChar; // NOTE: indices in terms of *character* (not: byte) positions + uint16_t endChar; + uint16_t fontID; + uint8_t faceStyleFlags; // FaceStyleFlag + uint8_t fontSize; + uint32_t textColorRGBA; +} StyleRecord; + +// NOTE: None of these macros check for buffer overflow +#define READ_U8() *pos; pos += 1; +#define READ_U16() (pos[0] << 8) | pos[1]; pos += 2; +#define READ_U32() (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; pos += 4; +#define READ_ARRAY(n) pos; pos += n; + +#define WRITE_CHAR(c) {dst[0]=c; dst += 1;} +#define WRITE_START_TAG(c) {dst[0]='<'; dst[1]=c; dst[2]='>'; dst += 3;} +#define WRITE_END_TAG(c) {dst[0]='<'; dst[1]='/'; dst[2]=c; dst[3]='>'; dst += 4;} + +#define FOURCC(str) ((((uint32_t) str[0]) << 24) | \ + (((uint32_t) str[1]) << 16) | \ + (((uint32_t) str[2]) << 8) | \ + (((uint32_t) str[3]) << 0)) +#define IS_10xxxxxx(c) ((c & 0xC0) == 0x80) + +static hb_buffer_t *tx3g_decode_to_utf8( hb_buffer_t *in ) +{ + uint8_t *pos = in->data; + uint8_t *end = in->data + in->size; + + uint16_t numStyleRecords = 0; + + uint8_t *startStyle; + uint8_t *endStyle; + + /* + * Parse the packet as a TX3G TextSample. + * + * Look for a single StyleBox ('styl') and read all contained StyleRecords. + * Ignore all other box types. + * + * NOTE: Buffer overflows on read are not checked. + */ + uint16_t textLength = READ_U16(); + uint8_t *text = READ_ARRAY(textLength); + startStyle = calloc( textLength, 1 ); + endStyle = calloc( textLength, 1 ); + while ( pos < end ) { + /* + * Read TextSampleModifierBox + */ + uint32_t size = READ_U32(); + if ( size == 0 ) { + size = pos - end; // extends to end of packet + } + if ( size == 1 ) { + hb_log( "dectx3gsub: TextSampleModifierBox has unsupported large size" ); + break; + } + uint32_t type = READ_U32(); + if ( type == FOURCC("uuid") ) { + hb_log( "dectx3gsub: TextSampleModifierBox has unsupported extended type" ); + break; + } + + if ( type == FOURCC("styl") ) { + // Found a StyleBox. Parse the contained StyleRecords + + if ( numStyleRecords != 0 ) { + hb_log( "dectx3gsub: found additional StyleBoxes on subtitle; skipping" ); + READ_ARRAY(size); + continue; + } + + numStyleRecords = READ_U16(); + + int i; + for (i=0; i<numStyleRecords; i++) { + StyleRecord curRecord; + curRecord.startChar = READ_U16(); + curRecord.endChar = READ_U16(); + curRecord.fontID = READ_U16(); + curRecord.faceStyleFlags = READ_U8(); + curRecord.fontSize = READ_U8(); + curRecord.textColorRGBA = READ_U32(); + + startStyle[curRecord.startChar] |= curRecord.faceStyleFlags; + endStyle[curRecord.endChar] |= curRecord.faceStyleFlags; + } + } else { + // Found some other kind of TextSampleModifierBox. Skip it. + READ_ARRAY(size); + } + } + + /* + * Copy text to output buffer, and add HTML markup for the style records + */ + int maxOutputSize = textLength + (numStyleRecords * NUM_FACE_STYLE_FLAGS * (MAX_OPEN_TAG_SIZE + MAX_CLOSE_TAG_SIZE)); + hb_buffer_t *out = hb_buffer_init( maxOutputSize ); + uint8_t *dst = out->data; + int charIndex = 0; + for ( pos = text, end = text + textLength; pos < end; pos++ ) { + if (IS_10xxxxxx(*pos)) { + // Is a non-first byte of a multi-byte UTF-8 character + WRITE_CHAR(*pos); + continue; // ...without incrementing 'charIndex' + } + + uint8_t plusStyles = startStyle[charIndex]; + uint8_t minusStyles = endStyle[charIndex]; + + if (minusStyles & UNDERLINE) + WRITE_END_TAG('u'); + if (minusStyles & ITALIC) + WRITE_END_TAG('i'); + if (minusStyles & BOLD) + WRITE_END_TAG('b'); + + if (plusStyles & BOLD) + WRITE_START_TAG('b'); + if (plusStyles & ITALIC) + WRITE_START_TAG('i'); + if (plusStyles & UNDERLINE) + WRITE_START_TAG('u'); + + WRITE_CHAR(*pos); + charIndex++; + } + + // Trim output buffer to the actual amount of data written + out->size = dst - out->data; + + // Copy metadata from the input packet to the output packet + out->start = in->start; + out->stop = in->stop; + + free( startStyle ); + free( endStyle ); + + return out; +} + +#undef READ_U8 +#undef READ_U16 +#undef READ_U32 +#undef READ_ARRAY + +#undef WRITE_CHAR +#undef WRITE_START_TAG +#undef WRITE_END_TAG + +static int dectx3gInit( hb_work_object_t * w, hb_job_t * job ) +{ + return 0; +} + +static int dectx3gWork( hb_work_object_t * w, hb_buffer_t ** buf_in, + hb_buffer_t ** buf_out ) +{ + hb_buffer_t * in = *buf_in; + hb_buffer_t * out = NULL; + + // Warn if the subtitle's duration has not been passed through by the demuxer, + // which will prevent the subtitle from displaying at all + if ( in->stop == 0 ) { + hb_log( "dectx3gsub: subtitle packet lacks duration" ); + } + + if ( in->size > 0 ) { + out = tx3g_decode_to_utf8(in); + } else { + out = hb_buffer_init( 0 ); + } + + // We shouldn't be storing the extra NULL character, + // but the MP4 muxer expects this, unfortunately. + if ( out->size > 0 && out->data[out->size - 1] != '\0' ) { + // NOTE: out->size remains unchanged + hb_buffer_realloc( out, out->size + 1 ); + out->data[out->size] = '\0'; + } + + // If the input packet was non-empty, do not pass through + // an empty output packet (even if the subtitle was empty), + // as this would be interpreted as an end-of-stream + if ( in->size > 0 && out->size == 0 ) { + hb_buffer_close(&out); + } + + // Dispose the input packet, as it is no longer needed + hb_buffer_close(&in); + + *buf_in = NULL; + *buf_out = out; + return HB_WORK_OK; +} + +static void dectx3gClose( hb_work_object_t * w ) +{ + // nothing +} + +hb_work_object_t hb_dectx3gsub = +{ + WORK_DECTX3GSUB, + "TX3G Subtitle Decoder", + dectx3gInit, + dectx3gWork, + dectx3gClose +}; diff --git a/libhb/decutf8sub.c b/libhb/decutf8sub.c new file mode 100644 index 000000000..dcd05d4fc --- /dev/null +++ b/libhb/decutf8sub.c @@ -0,0 +1,64 @@ +/* + This file is part of the HandBrake source code. + Homepage: <http://handbrake.fr/>. + It may be used under the terms of the GNU General Public License. */ + +/* + * Decoder for UTF-8 subtitles obtained from file input-sources. + * + * Input and output packet format is UTF-8 encoded text, + * with limited HTML-style markup (only <b>, <i>, and <u>). + * + * @author David Foster (davidfstr) + */ + +#include <stdlib.h> +#include <stdio.h> +#include "hb.h" + +static int decutf8Init( hb_work_object_t * w, hb_job_t * job ) +{ + return 0; +} + +static int decutf8Work( hb_work_object_t * w, hb_buffer_t ** buf_in, + hb_buffer_t ** buf_out ) +{ + hb_buffer_t * in = *buf_in; + hb_buffer_t * out = NULL; + + // Pass the packets through without modification + out = in; + + // Warn if the subtitle's duration has not been passed through by the demuxer, + // which will prevent the subtitle from displaying at all + if ( out->stop == 0 ) { + hb_log( "decutf8sub: subtitle packet lacks duration" ); + } + + // We shouldn't be storing the extra NULL character, + // but the MP4 muxer expects this, unfortunately. + if ( out->size > 0 && out->data[out->size - 1] != '\0' ) { + // NOTE: out->size remains unchanged + hb_buffer_realloc( out, out->size + 1 ); + out->data[out->size] = '\0'; + } + + *buf_in = NULL; + *buf_out = out; + return HB_WORK_OK; +} + +static void decutf8Close( hb_work_object_t * w ) +{ + // nothing +} + +hb_work_object_t hb_decutf8sub = +{ + WORK_DECUTF8SUB, + "UTF-8 Subtitle Decoder", + decutf8Init, + decutf8Work, + decutf8Close +}; diff --git a/libhb/hb.c b/libhb/hb.c index acdce5597..0c951f41d 100644 --- a/libhb/hb.c +++ b/libhb/hb.c @@ -276,6 +276,8 @@ hb_handle_t * hb_init( int verbose, int update_check ) hb_register( &hb_encvobsub ); hb_register( &hb_deccc608 ); hb_register( &hb_decsrtsub ); + hb_register( &hb_decutf8sub ); + hb_register( &hb_dectx3gsub ); hb_register( &hb_render ); hb_register( &hb_encavcodec ); hb_register( &hb_encx264 ); @@ -377,6 +379,8 @@ hb_handle_t * hb_init_dl( int verbose, int update_check ) hb_register( &hb_encvobsub ); hb_register( &hb_deccc608 ); hb_register( &hb_decsrtsub ); + hb_register( &hb_decutf8sub ); + hb_register( &hb_dectx3gsub ); hb_register( &hb_render ); hb_register( &hb_encavcodec ); hb_register( &hb_encx264 ); diff --git a/libhb/internal.h b/libhb/internal.h index dd922dc46..dd217507d 100644 --- a/libhb/internal.h +++ b/libhb/internal.h @@ -38,19 +38,25 @@ void hb_set_state( hb_handle_t *, hb_state_t * ); /*********************************************************************** * fifo.c **********************************************************************/ +/* + * Holds a packet of data that is moving through the transcoding process. + * + * May have metadata associated with it via extra fields + * that are conditionally used depending on the type of packet. + */ struct hb_buffer_s { - int size; - int alloc; - uint8_t * data; - int cur; + int size; // size of this packet + int alloc; // used internally by the packet allocator (hb_buffer_init) + uint8_t * data; // packet data + int cur; // used internally by packet lists (hb_list_t) int64_t sequence; - int id; - int64_t start; - int64_t stop; - int new_chap; + int id; // ID of the track that the packet comes from + int64_t start; // Video and subtitle packets: start time of frame/subtitle + int64_t stop; // Video and subtitle packets: stop time of frame/subtitle + int new_chap; // Video packets: ??? #define HB_FRAME_IDR 0x01 #define HB_FRAME_I 0x02 @@ -66,13 +72,20 @@ struct hb_buffer_s /* Holds the output PTS from x264, for use by b-frame offsets in muxmp4.c */ int64_t renderOffset; + // VOB subtitle packets: + // Location and size of the subpicture. int x; int y; int width; int height; + // Video packets (after processing by the hb_sync_video work-object): + // A (copy of a) VOB subtitle packet that needs to be burned into this video packet by the hb_render work-object. + // Subtitles that are simply passed thru are NOT attached to the associated video packets. hb_buffer_t * sub; + // Packets in a list: + // the next packet in the list hb_buffer_t * next; }; @@ -285,6 +298,8 @@ enum WORK_DECCC608, WORK_DECVOBSUB, WORK_DECSRTSUB, + WORK_DECUTF8SUB, + WORK_DECTX3GSUB, WORK_ENCVOBSUB, WORK_RENDER, WORK_ENCAVCODEC, diff --git a/libhb/stream.c b/libhb/stream.c index caed365cd..4b1bd98eb 100644 --- a/libhb/stream.c +++ b/libhb/stream.c @@ -2831,6 +2831,56 @@ static void add_ffmpeg_audio( hb_title_t *title, hb_stream_t *stream, int id ) } } +static void add_ffmpeg_subtitle( hb_title_t *title, hb_stream_t *stream, int id ) +{ + AVStream *st = stream->ffmpeg_ic->streams[id]; + AVCodecContext *codec = st->codec; + + hb_subtitle_t *subtitle = calloc( 1, sizeof(*subtitle) ); + + subtitle->id = id; + + switch ( codec->codec_id ) + { + // TODO(davidfstr): get universal VOB sub input working + /* + case CODEC_ID_DVD_SUBTITLE: + subtitle->format = PICTURESUB; + subtitle->source = VOBSUB; + subtitle->config.dest = RENDERSUB; // By default render (burn-in) the VOBSUB. + break; + */ + case CODEC_ID_TEXT: + subtitle->format = TEXTSUB; + subtitle->source = UTF8SUB; + subtitle->config.dest = PASSTHRUSUB; + break; + case CODEC_ID_MOV_TEXT: // TX3G + subtitle->format = TEXTSUB; + subtitle->source = TX3GSUB; + subtitle->config.dest = PASSTHRUSUB; + break; + // TODO(davidfstr): implement SSA subtitle support + /* + case CODEC_ID_SSA: + subtitle->format = TEXTSUB; + subtitle->source = SSASUB; + subtitle->config.dest = PASSTHRUSUB; + break; + */ + default: + hb_log("add_ffmpeg_subtitle: unknown subtitle stream type: 0x%x", (int) codec->codec_id); + free(subtitle); + return; + } + + iso639_lang_t *language = lang_for_code2( st->language ); + strcpy( subtitle->lang, language->eng_name ); + strncpy( subtitle->iso639_2, language->iso639_2, 4 ); + + hb_list_add(title->list_subtitle, subtitle); +} + static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream ) { AVFormatContext *ic = stream->ffmpeg_ic; @@ -2887,6 +2937,10 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream ) { add_ffmpeg_audio( title, stream, i ); } + else if ( ic->streams[i]->codec->codec_type == CODEC_TYPE_SUBTITLE ) + { + add_ffmpeg_subtitle( title, stream, i ); + } } title->container_name = strdup( ic->iformat->name ); @@ -3074,6 +3128,26 @@ static int ffmpeg_read( hb_stream_t *stream, hb_buffer_t *buf ) { buf->renderOffset = buf->start; } + + /* + * Fill out buf->stop for subtitle packets + * + * libavcodec's MKV demuxer stores the duration of UTF-8 (TEXT) subtitles + * in the 'convergence_duration' field for some reason. + * + * Other subtitles' durations are stored in the 'duration' field. + */ + enum CodecID ffmpeg_pkt_codec = stream->ffmpeg_ic->streams[stream->ffmpeg_pkt->stream_index]->codec->codec_id; + if ( ffmpeg_pkt_codec == CODEC_ID_TEXT ) { + int64_t ffmpeg_pkt_duration = stream->ffmpeg_pkt->convergence_duration; + int64_t buf_duration = av_to_hb_pts( ffmpeg_pkt_duration, tsconv ); + buf->stop = buf->start + buf_duration; + } + if ( ffmpeg_pkt_codec == CODEC_ID_MOV_TEXT ) { + int64_t ffmpeg_pkt_duration = stream->ffmpeg_pkt->duration; + int64_t buf_duration = av_to_hb_pts( ffmpeg_pkt_duration, tsconv ); + buf->stop = buf->start + buf_duration; + } /* * Check to see whether this video buffer is on a chapter diff --git a/libhb/sync.c b/libhb/sync.c index a36b37b5b..36017801b 100644 --- a/libhb/sync.c +++ b/libhb/sync.c @@ -564,7 +564,9 @@ int syncVideoWork( hb_work_object_t * w, hb_buffer_t ** buf_in, */ if( subtitle->source == CC608SUB || subtitle->source == CC708SUB || - subtitle->source == SRTSUB ) + subtitle->source == SRTSUB || + subtitle->source == UTF8SUB || + subtitle->source == TX3GSUB) { /* * Rewrite timestamps on subtitles that came from Closed Captions diff --git a/libhb/work.c b/libhb/work.c index d2c8ac66b..838e50702 100644 --- a/libhb/work.c +++ b/libhb/work.c @@ -308,6 +308,8 @@ void hb_display_job_info( hb_job_t * job ) hb_log( " * subtitle track %i, %s (id %x) %s [%s] -> %s ", subtitle->track, subtitle->lang, subtitle->id, subtitle->format == PICTURESUB ? "Picture" : "Text", subtitle->source == VOBSUB ? "VOBSUB" : + subtitle->source == UTF8SUB ? "UTF-8" : + subtitle->source == TX3GSUB ? "TX3G" : ((subtitle->source == CC608SUB || subtitle->source == CC708SUB) ? "CC" : "SRT"), subtitle->config.dest == RENDERSUB ? "Render/Burn in" : "Pass-Through"); @@ -795,6 +797,25 @@ static void do_job( hb_job_t * job, int cpu_count ) w->subtitle = subtitle; hb_list_add( job->list_work, w ); } + + if( !job->indepth_scan && subtitle->source == UTF8SUB ) + { + w = hb_get_work( WORK_DECUTF8SUB ); + w->fifo_in = subtitle->fifo_in; + w->fifo_out = subtitle->fifo_raw; + hb_list_add( job->list_work, w ); + } + + if( !job->indepth_scan && subtitle->source == TX3GSUB ) + { + // TODO(davidfstr): For MP4 containers, an alternate work-object + // should be used that just passes the packets through, + // instead of downconverting to UTF-8 subtitles. + w = hb_get_work( WORK_DECTX3GSUB ); + w->fifo_in = subtitle->fifo_in; + w->fifo_out = subtitle->fifo_raw; + hb_list_add( job->list_work, w ); + } if( !job->indepth_scan && subtitle->format == PICTURESUB |