diff options
author | jstebbins <[email protected]> | 2014-04-13 16:22:16 +0000 |
---|---|---|
committer | jstebbins <[email protected]> | 2014-04-13 16:22:16 +0000 |
commit | 6db1a1e531ad62ba977f4587fb9011b0fd0b3416 (patch) | |
tree | 2c6882e9344b8181a11238835ae163d52e98e49e /libhb/decssasub.c | |
parent | e6ca45c979ec69bd1736bc943063083b08ce1914 (diff) |
Convert all text subtitles to ASS subs
Add support for font color to tx3g.
Allow more than one style flag at time in tx3g.
Add positioning support to CC subs
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@6163 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb/decssasub.c')
-rw-r--r-- | libhb/decssasub.c | 462 |
1 files changed, 198 insertions, 264 deletions
diff --git a/libhb/decssasub.c b/libhb/decssasub.c index de2c3c672..3cac39934 100644 --- a/libhb/decssasub.c +++ b/libhb/decssasub.c @@ -11,42 +11,35 @@ * Converts SSA subtitles to either: * (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or * (2) PICTURESUB format, using libass. - * + * * SSA format references: * http://www.matroska.org/technical/specs/subtitles/ssa.html * http://moodub.free.fr/video/ass-specs.doc * vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString - * + * * libass references: * libass-0.9.9/ass.h * vlc-1.0.4/modules/codec/libass.c - * + * * @author David Foster (davidfstr) */ #include <stdlib.h> #include <stdio.h> +#include <ctype.h> #include "hb.h" #include <ass/ass.h> +#include "decssasub.h" +#include "colormap.h" struct hb_work_private_s { // If decoding to PICTURESUB format: int readOrder; - int raw; hb_job_t *job; }; -typedef enum { - BOLD = 0x01, - ITALIC = 0x02, - UNDERLINE = 0x04 -} StyleSet; - -// "<b></b>".len + "<i></i>".len + "<u></u>".len -#define MAX_OVERHEAD_PER_OVERRIDE (7 * 3) - #define SSA_2_HB_TIME(hr,min,sec,centi) \ ( 90L * ( hr * 1000L * 60 * 60 +\ min * 1000L * 60 +\ @@ -55,71 +48,189 @@ typedef enum { #define SSA_VERBOSE_PACKETS 0 -static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles ) +static int ssa_update_style(char *ssa, hb_subtitle_style_t *style) { - StyleSet nextStyles = prevStyles; - for (;;) + int pos, end, index; + + if (ssa[0] != '{') + return 0; + + pos = 1; + while (ssa[pos] != '}' && ssa[pos] != '\0') { - // Skip over leading '{' or last '\\' + index = -1; + + // Skip any malformed markup junk + while (strchr("\\}", ssa[pos]) == NULL) pos++; pos++; - - // Scan for next \code - while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++; - if ( *pos != '\\' ) + // Check for an index that is in some markup (e.g. font color) + if (isdigit(ssa[pos])) + { + index = ssa[pos++] - 0x30; + } + // Find the end of this markup clause + end = pos; + while (strchr("\\}", ssa[end]) == NULL) end++; + // Handle simple integer valued attributes + if (strchr("ibu", ssa[pos]) != NULL && isdigit(ssa[pos+1])) { - // End of style override block - break; + int val = strtol(ssa + pos + 1, NULL, 0); + switch (ssa[pos]) + { + case 'i': + style->flags = (style->flags & ~HB_STYLE_FLAG_ITALIC) | + !!val * HB_STYLE_FLAG_ITALIC; + break; + case 'b': + style->flags = (style->flags & ~HB_STYLE_FLAG_BOLD) | + !!val * HB_STYLE_FLAG_BOLD; + break; + case 'u': + style->flags = (style->flags & ~HB_STYLE_FLAG_UNDERLINE) | + !!val * HB_STYLE_FLAG_UNDERLINE; + break; + } } - - // If next chars are \[biu][01], interpret it - if ( strchr("biu", pos[1]) && strchr("01", pos[2]) ) + if (ssa[pos] == 'c' && ssa[pos+1] == '&' && ssa[pos+2] == 'H') { - StyleSet styleID = - pos[1] == 'b' ? BOLD : - pos[1] == 'i' ? ITALIC : - pos[1] == 'u' ? UNDERLINE : 0; - int enabled = (pos[2] == '1'); - - if (enabled) + // Font color markup + char *endptr; + uint32_t bgr; + + bgr = strtol(ssa + pos + 3, &endptr, 16); + if (*endptr == '&') { - nextStyles |= styleID; + switch (index) + { + case -1: + case 1: + style->fg_rgb = HB_BGR_TO_RGB(bgr); + break; + case 2: + style->alt_rgb = HB_BGR_TO_RGB(bgr); + break; + case 3: + style->ol_rgb = HB_BGR_TO_RGB(bgr); + break; + case 4: + style->bg_rgb = HB_BGR_TO_RGB(bgr); + break; + default: + // Unknown color index, ignore + break; + } } - else + } + if ((ssa[pos] == 'a' && ssa[pos+1] == '&' && ssa[pos+2] == 'H') || + (!strcmp(ssa+pos, "alpha") && ssa[pos+5] == '&' && ssa[pos+6] == 'H')) + { + // Font alpha markup + char *endptr; + uint8_t alpha; + int alpha_pos = 3; + + if (ssa[1] == 'l') + alpha_pos = 7; + + alpha = strtol(ssa + pos + alpha_pos, &endptr, 16); + if (*endptr == '&') { - nextStyles &= ~styleID; + // SSA alpha is inverted 0 is opaque + alpha = 255 - alpha; + switch (index) + { + case -1: + case 1: + style->fg_alpha = alpha; + break; + case 2: + style->alt_alpha = alpha; + break; + case 3: + style->ol_alpha = alpha; + break; + case 4: + style->bg_alpha = alpha; + break; + default: + // Unknown alpha index, ignore + break; + } } } + pos = end; } - return nextStyles; + if (ssa[pos] == '}') + pos++; + return pos; } -static void ssa_append_html_tags_for_style_change( - uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles ) +char * hb_ssa_to_text(char *in, int *consumed, hb_subtitle_style_t *style) { - #define APPEND(str) { \ - char *src = str; \ - while (*src) { *(*dst)++ = *src++; } \ + int markup_len = 0; + int in_pos = 0; + int out_pos = 0; + char *out = malloc(strlen(in) + 1); // out will never be longer than in + + for (in_pos = 0; in[in_pos] != '\0'; in_pos++) + { + if ((markup_len = ssa_update_style(in + in_pos, style))) + { + *consumed = in_pos + markup_len; + out[out_pos++] = '\0'; + return out; + } + // Check escape codes + if (in[in_pos] == '\\') + { + in_pos++; + switch (in[in_pos]) + { + case '\0': + in_pos--; + break; + case 'N': + case 'n': + out[out_pos++] = '\n'; + break; + case 'h': + out[out_pos++] = ' '; + break; + default: + out[out_pos++] = in[in_pos]; + break; + } + } + else + { + out[out_pos++] = in[in_pos]; + } } + *consumed = in_pos; + out[out_pos++] = '\0'; + return out; +} + +void hb_ssa_style_init(hb_subtitle_style_t *style) +{ + style->flags = 0; + + style->fg_rgb = 0x00FFFFFF; + style->alt_rgb = 0x00FFFFFF; + style->ol_rgb = 0x000F0F0F; + style->bg_rgb = 0x000F0F0F; - // Reverse-order close all previous styles - if (prevStyles & UNDERLINE) APPEND("</u>"); - if (prevStyles & ITALIC) APPEND("</i>"); - if (prevStyles & BOLD) APPEND("</b>"); - - // Forward-order open all next styles - if (nextStyles & BOLD) APPEND("<b>"); - if (nextStyles & ITALIC) APPEND("<i>"); - if (nextStyles & UNDERLINE) APPEND("<u>"); - - #undef APPEND + style->fg_alpha = 0xFF; + style->alt_alpha = 0xFF; + style->ol_alpha = 0xFF; + style->bg_alpha = 0xFF; } -static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence ); static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence ); /* * Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets. - * + * * SSA packet format: * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) + * 1 2 3 4 5 6 7 8 9 10 @@ -129,7 +240,7 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in ) // Store NULL after the end of the buffer to make using string processing safe hb_buffer_realloc(in, ++in->size); in->data[in->size - 1] = '\0'; - + hb_buffer_t *out_list = NULL; hb_buffer_t **nextPtr = &out_list; @@ -142,53 +253,31 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in ) // Skip empty lines and spaces between adjacent CR and LF if (curLine[0] == '\0') continue; - + // Decode an individual SSA line hb_buffer_t *out; - if ( w->subtitle->config.dest == PASSTHRUSUB ) { - out = ssa_decode_line_to_utf8( (uint8_t *) curLine, strlen( curLine ), in->sequence ); - if ( out == NULL ) - continue; - - // We shouldn't be storing the extra NULL character, - // but the MP4 muxer expects this, unfortunately. - if (out->size > 0 && out->data[out->size - 1] != '\0') - { - hb_buffer_realloc(out, ++out->size); - out->data[out->size - 1] = '\0'; - } - - // If the input packet was non-empty, do not pass through - // an empty output packet (even if the subtitle was empty), - // as this would be interpreted as an end-of-stream - if ( in->size > 0 && out->size == 0 ) { - hb_buffer_close(&out); - continue; - } - } else if ( w->subtitle->config.dest == RENDERSUB ) { - out = ssa_decode_line_to_mkv_ssa( w, (uint8_t *) curLine, strlen( curLine ), in->sequence ); - if ( out == NULL ) - continue; - } - + out = ssa_decode_line_to_mkv_ssa(w, (uint8_t *)curLine, strlen(curLine), in->sequence); + if ( out == NULL ) + continue; + // Append 'out' to 'out_list' *nextPtr = out; nextPtr = &out->next; } - // For point-to-point encoding, when the start time of the stream + // For point-to-point encoding, when the start time of the stream // may be offset, the timestamps of the subtitles must be offset as well. // // HACK: Here we are making the assumption that, under normal circumstances, // the output display time of the first output packet is equal to the // display time of the input packet. - // - // During point-to-point encoding, the display time of the input + // + // During point-to-point encoding, the display time of the input // packet will be offset to compensate. - // - // Therefore we offset all of the output packets by a slip amount - // such that first output packet's display time aligns with the - // input packet's display time. This should give the correct time + // + // Therefore we offset all of the output packets by a slip amount + // such that first output packet's display time aligns with the + // input packet's display time. This should give the correct time // when point-to-point encoding is in effect. if (out_list && out_list->s.start > in->s.start) { @@ -203,13 +292,13 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in ) out = out->next; } } - + return out_list; } /* * Parses the start and stop time from the specified SSA packet. - * + * * Returns true if parsing failed; false otherwise. */ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64_t *in_stop ) @@ -223,7 +312,7 @@ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64 // format specifier "%*128[^,]" will not match on a bare ','. There // must be at least one non ',' character in the match. So the format // specifier is placed directly next to the ':' so that the next - // expected ' ' after the ':' will be the character it matches on + // expected ' ' after the ':' will be the character it matches on // when there is no layer field. int numPartsRead = sscanf( (char *) in_data, "Dialogue:%*128[^,]," "%d:%d:%d.%d," // Start @@ -232,10 +321,10 @@ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64 &end_hr, &end_min, &end_sec, &end_centi ); if ( numPartsRead != 8 ) return 1; - + *in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi); *in_stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi); - + return 0; } @@ -258,137 +347,7 @@ static uint8_t *find_field( uint8_t *pos, uint8_t *end, int fieldNum ) * SSA line format: * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0' * 1 2 3 4 5 6 7 8 9 10 - */ -static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence ) -{ - uint8_t *pos = in_data; - uint8_t *end = in_data + in_size; - - // Parse values for in->s.start and in->s.stop - int64_t in_start, in_stop; - if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) ) - goto fail; - - uint8_t *textFieldPos = find_field( pos, end, 10 ); - if ( textFieldPos == NULL ) - goto fail; - - // Count the number of style overrides in the Text field - int numStyleOverrides = 0; - pos = textFieldPos; - while ( pos < end ) - { - if (*pos++ == '{') - { - numStyleOverrides++; - } - } - - int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE); - hb_buffer_t *out = hb_buffer_init( maxOutputSize ); - if ( out == NULL ) - return NULL; - - /* - * The Text field contains plain text marked up with: - * (1) '\n' -> space - * (2) '\N' -> newline - * (3) curly-brace control codes like '{\k44}' -> HTML tags / strip - * - * Perform the above conversions and copy it to the output packet - */ - StyleSet prevStyles = 0; - uint8_t *dst = out->data; - pos = textFieldPos; - while ( pos < end ) - { - if ( pos[0] == '\\' && pos[1] == 'n' ) - { - *dst++ = ' '; - pos += 2; - } - else if ( pos[0] == '\\' && pos[1] == 'N' ) - { - *dst++ = '\n'; - pos += 2; - } - else if ( pos[0] == '{' ) - { - // Parse SSA style overrides and append appropriate HTML style tags - StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles ); - ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles ); - prevStyles = nextStyles; - - // Skip past SSA control code - while ( pos < end && *pos != '}' ) pos++; - if ( pos < end && *pos == '}' ) pos++; - } - else - { - // Copy raw character - *dst++ = *pos++; - } - } - - // Append closing HTML style tags - ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 ); - - // Trim output buffer to the actual amount of data written - out->size = dst - out->data; - - // Copy metadata from the input packet to the output packet - out->s.frametype = HB_FRAME_SUBTITLE; - out->s.start = in_start; - out->s.stop = in_stop; - out->sequence = in_sequence; - - return out; - -fail: - hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data ); - return NULL; -} - -static hb_buffer_t * ssa_to_mkv_ssa( hb_work_object_t * w, hb_buffer_t * in ) -{ - hb_buffer_t * out_last = NULL; - hb_buffer_t * out_first = NULL; - - // Store NULL after the end of the buffer to make using string processing safe - hb_buffer_realloc(in, ++in->size); - in->data[in->size - 1] = '\0'; - - const char *EOL = "\r\n"; - char *curLine, *curLine_parserData; - for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData ); - curLine; - curLine = strtok_r( NULL, EOL, &curLine_parserData ) ) - { - hb_buffer_t * out; - - out = ssa_decode_line_to_mkv_ssa( w, (uint8_t *) curLine, strlen( curLine ), in->sequence ); - if( out ) - { - if ( out_last == NULL ) - { - out_last = out_first = out; - } - else - { - out_last->next = out; - out_last = out; - } - } - } - - return out_first; -} - -/* - * SSA line format: - * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0' - * 1 2 3 4 5 6 7 8 9 10 - * + * * MKV-SSA packet format: * ReadOrder,Marked, Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0' * 1 2 3 4 5 6 7 8 9 @@ -397,22 +356,11 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i { hb_work_private_t * pv = w->private_data; hb_buffer_t * out; - + // Parse values for in->s.start and in->s.stop int64_t in_start, in_stop; if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) ) goto fail; - - if (pv->raw) - { - out = hb_buffer_init(in_size + 3); - snprintf((char*)out->data, in_size + 3, "%s\r\n", in_data); - out->s.frametype = HB_FRAME_SUBTITLE; - out->s.start = in_start; - out->s.stop = in_stop; - out->sequence = in_sequence; - return out; - } // Convert the SSA packet to MKV-SSA format, which is what libass expects char *mkvIn; @@ -424,18 +372,18 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i // format specifier "%*128[^,]" will not match on a bare ','. There // must be at least one non ',' character in the match. So the format // specifier is placed directly next to the ':' so that the next - // expected ' ' after the ':' will be the character it matches on + // expected ' ' after the ':' will be the character it matches on // when there is no layer field. numPartsRead = sscanf( (char *)in_data, "Dialogue:%128[^,],", layerField ); if ( numPartsRead != 1 ) goto fail; - + styleToTextFields = (char *)find_field( in_data, in_data + in_size, 4 ); if ( styleToTextFields == NULL ) { free( layerField ); goto fail; } - + // The sscanf conversion above will result in an extra space // before the layerField. Strip the space. char *stripLayerField = layerField; @@ -449,9 +397,9 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i strcat( mkvIn, "," ); strcat( mkvIn, stripLayerField ); strcat( mkvIn, "," ); - strcat( mkvIn, (char *) styleToTextFields ); - - out->size = strlen(mkvIn); + strcat( mkvIn, (char *)styleToTextFields ); + + out->size = strlen(mkvIn) + 1; out->s.frametype = HB_FRAME_SUBTITLE; out->s.start = in_start; out->s.stop = in_stop; @@ -461,11 +409,11 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i { hb_buffer_close(&out); } - + free( layerField ); - + return out; - + fail: hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data ); return NULL; @@ -479,24 +427,18 @@ static int decssaInit( hb_work_object_t * w, hb_job_t * job ) w->private_data = pv; pv->job = job; - if (job->mux & HB_MUX_MASK_AV && w->subtitle->config.dest != RENDERSUB ) - { - pv->raw = 1; - } - return 0; } static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in, hb_buffer_t ** buf_out ) { - hb_work_private_t * pv = w->private_data; hb_buffer_t * in = *buf_in; - + #if SSA_VERBOSE_PACKETS printf("\nPACKET(%"PRId64",%"PRId64"): %.*s\n", in->s.start/90, in->s.stop/90, in->size, in->data); #endif - + if ( in->size <= 0 ) { *buf_out = in; @@ -504,15 +446,7 @@ static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in, return HB_WORK_DONE; } - if (w->subtitle->config.dest == PASSTHRUSUB && - (pv->job->mux & HB_MUX_MASK_MKV)) - { - *buf_out = ssa_to_mkv_ssa(w, in); - } - else - { - *buf_out = ssa_decode_packet(w, in); - } + *buf_out = ssa_decode_packet(w, in); return HB_WORK_OK; } |