diff options
Diffstat (limited to 'libhb/decssasub.c')
-rw-r--r-- | libhb/decssasub.c | 462 |
1 files changed, 198 insertions, 264 deletions
diff --git a/libhb/decssasub.c b/libhb/decssasub.c index de2c3c672..3cac39934 100644 --- a/libhb/decssasub.c +++ b/libhb/decssasub.c @@ -11,42 +11,35 @@ * Converts SSA subtitles to either: * (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or * (2) PICTURESUB format, using libass. - * + * * SSA format references: * http://www.matroska.org/technical/specs/subtitles/ssa.html * http://moodub.free.fr/video/ass-specs.doc * vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString - * + * * libass references: * libass-0.9.9/ass.h * vlc-1.0.4/modules/codec/libass.c - * + * * @author David Foster (davidfstr) */ #include <stdlib.h> #include <stdio.h> +#include <ctype.h> #include "hb.h" #include <ass/ass.h> +#include "decssasub.h" +#include "colormap.h" struct hb_work_private_s { // If decoding to PICTURESUB format: int readOrder; - int raw; hb_job_t *job; }; -typedef enum { - BOLD = 0x01, - ITALIC = 0x02, - UNDERLINE = 0x04 -} StyleSet; - -// "<b></b>".len + "<i></i>".len + "<u></u>".len -#define MAX_OVERHEAD_PER_OVERRIDE (7 * 3) - #define SSA_2_HB_TIME(hr,min,sec,centi) \ ( 90L * ( hr * 1000L * 60 * 60 +\ min * 1000L * 60 +\ @@ -55,71 +48,189 @@ typedef enum { #define SSA_VERBOSE_PACKETS 0 -static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles ) +static int ssa_update_style(char *ssa, hb_subtitle_style_t *style) { - StyleSet nextStyles = prevStyles; - for (;;) + int pos, end, index; + + if (ssa[0] != '{') + return 0; + + pos = 1; + while (ssa[pos] != '}' && ssa[pos] != '\0') { - // Skip over leading '{' or last '\\' + index = -1; + + // Skip any malformed markup junk + while (strchr("\\}", ssa[pos]) == NULL) pos++; pos++; - - // Scan for next \code - while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++; - if ( *pos != '\\' ) + // Check for an index that is in some markup (e.g. font color) + if (isdigit(ssa[pos])) + { + index = ssa[pos++] - 0x30; + } + // Find the end of this markup clause + end = pos; + while (strchr("\\}", ssa[end]) == NULL) end++; + // Handle simple integer valued attributes + if (strchr("ibu", ssa[pos]) != NULL && isdigit(ssa[pos+1])) { - // End of style override block - break; + int val = strtol(ssa + pos + 1, NULL, 0); + switch (ssa[pos]) + { + case 'i': + style->flags = (style->flags & ~HB_STYLE_FLAG_ITALIC) | + !!val * HB_STYLE_FLAG_ITALIC; + break; + case 'b': + style->flags = (style->flags & ~HB_STYLE_FLAG_BOLD) | + !!val * HB_STYLE_FLAG_BOLD; + break; + case 'u': + style->flags = (style->flags & ~HB_STYLE_FLAG_UNDERLINE) | + !!val * HB_STYLE_FLAG_UNDERLINE; + break; + } } - - // If next chars are \[biu][01], interpret it - if ( strchr("biu", pos[1]) && strchr("01", pos[2]) ) + if (ssa[pos] == 'c' && ssa[pos+1] == '&' && ssa[pos+2] == 'H') { - StyleSet styleID = - pos[1] == 'b' ? BOLD : - pos[1] == 'i' ? ITALIC : - pos[1] == 'u' ? UNDERLINE : 0; - int enabled = (pos[2] == '1'); - - if (enabled) + // Font color markup + char *endptr; + uint32_t bgr; + + bgr = strtol(ssa + pos + 3, &endptr, 16); + if (*endptr == '&') { - nextStyles |= styleID; + switch (index) + { + case -1: + case 1: + style->fg_rgb = HB_BGR_TO_RGB(bgr); + break; + case 2: + style->alt_rgb = HB_BGR_TO_RGB(bgr); + break; + case 3: + style->ol_rgb = HB_BGR_TO_RGB(bgr); + break; + case 4: + style->bg_rgb = HB_BGR_TO_RGB(bgr); + break; + default: + // Unknown color index, ignore + break; + } } - else + } + if ((ssa[pos] == 'a' && ssa[pos+1] == '&' && ssa[pos+2] == 'H') || + (!strcmp(ssa+pos, "alpha") && ssa[pos+5] == '&' && ssa[pos+6] == 'H')) + { + // Font alpha markup + char *endptr; + uint8_t alpha; + int alpha_pos = 3; + + if (ssa[1] == 'l') + alpha_pos = 7; + + alpha = strtol(ssa + pos + alpha_pos, &endptr, 16); + if (*endptr == '&') { - nextStyles &= ~styleID; + // SSA alpha is inverted 0 is opaque + alpha = 255 - alpha; + switch (index) + { + case -1: + case 1: + style->fg_alpha = alpha; + break; + case 2: + style->alt_alpha = alpha; + break; + case 3: + style->ol_alpha = alpha; + break; + case 4: + style->bg_alpha = alpha; + break; + default: + // Unknown alpha index, ignore + break; + } } } + pos = end; } - return nextStyles; + if (ssa[pos] == '}') + pos++; + return pos; } -static void ssa_append_html_tags_for_style_change( - uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles ) +char * hb_ssa_to_text(char *in, int *consumed, hb_subtitle_style_t *style) { - #define APPEND(str) { \ - char *src = str; \ - while (*src) { *(*dst)++ = *src++; } \ + int markup_len = 0; + int in_pos = 0; + int out_pos = 0; + char *out = malloc(strlen(in) + 1); // out will never be longer than in + + for (in_pos = 0; in[in_pos] != '\0'; in_pos++) + { + if ((markup_len = ssa_update_style(in + in_pos, style))) + { + *consumed = in_pos + markup_len; + out[out_pos++] = '\0'; + return out; + } + // Check escape codes + if (in[in_pos] == '\\') + { + in_pos++; + switch (in[in_pos]) + { + case '\0': + in_pos--; + break; + case 'N': + case 'n': + out[out_pos++] = '\n'; + break; + case 'h': + out[out_pos++] = ' '; + break; + default: + out[out_pos++] = in[in_pos]; + break; + } + } + else + { + out[out_pos++] = in[in_pos]; + } } + *consumed = in_pos; + out[out_pos++] = '\0'; + return out; +} + +void hb_ssa_style_init(hb_subtitle_style_t *style) +{ + style->flags = 0; + + style->fg_rgb = 0x00FFFFFF; + style->alt_rgb = 0x00FFFFFF; + style->ol_rgb = 0x000F0F0F; + style->bg_rgb = 0x000F0F0F; - // Reverse-order close all previous styles - if (prevStyles & UNDERLINE) APPEND("</u>"); - if (prevStyles & ITALIC) APPEND("</i>"); - if (prevStyles & BOLD) APPEND("</b>"); - - // Forward-order open all next styles - if (nextStyles & BOLD) APPEND("<b>"); - if (nextStyles & ITALIC) APPEND("<i>"); - if (nextStyles & UNDERLINE) APPEND("<u>"); - - #undef APPEND + style->fg_alpha = 0xFF; + style->alt_alpha = 0xFF; + style->ol_alpha = 0xFF; + style->bg_alpha = 0xFF; } -static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence ); static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence ); /* * Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets. - * + * * SSA packet format: * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) + * 1 2 3 4 5 6 7 8 9 10 @@ -129,7 +240,7 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in ) // Store NULL after the end of the buffer to make using string processing safe hb_buffer_realloc(in, ++in->size); in->data[in->size - 1] = '\0'; - + hb_buffer_t *out_list = NULL; hb_buffer_t **nextPtr = &out_list; @@ -142,53 +253,31 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in ) // Skip empty lines and spaces between adjacent CR and LF if (curLine[0] == '\0') continue; - + // Decode an individual SSA line hb_buffer_t *out; - if ( w->subtitle->config.dest == PASSTHRUSUB ) { - out = ssa_decode_line_to_utf8( (uint8_t *) curLine, strlen( curLine ), in->sequence ); - if ( out == NULL ) - continue; - - // We shouldn't be storing the extra NULL character, - // but the MP4 muxer expects this, unfortunately. - if (out->size > 0 && out->data[out->size - 1] != '\0') - { - hb_buffer_realloc(out, ++out->size); - out->data[out->size - 1] = '\0'; - } - - // If the input packet was non-empty, do not pass through - // an empty output packet (even if the subtitle was empty), - // as this would be interpreted as an end-of-stream - if ( in->size > 0 && out->size == 0 ) { - hb_buffer_close(&out); - continue; - } - } else if ( w->subtitle->config.dest == RENDERSUB ) { - out = ssa_decode_line_to_mkv_ssa( w, (uint8_t *) curLine, strlen( curLine ), in->sequence ); - if ( out == NULL ) - continue; - } - + out = ssa_decode_line_to_mkv_ssa(w, (uint8_t *)curLine, strlen(curLine), in->sequence); + if ( out == NULL ) + continue; + // Append 'out' to 'out_list' *nextPtr = out; nextPtr = &out->next; } - // For point-to-point encoding, when the start time of the stream + // For point-to-point encoding, when the start time of the stream // may be offset, the timestamps of the subtitles must be offset as well. // // HACK: Here we are making the assumption that, under normal circumstances, // the output display time of the first output packet is equal to the // display time of the input packet. - // - // During point-to-point encoding, the display time of the input + // + // During point-to-point encoding, the display time of the input // packet will be offset to compensate. - // - // Therefore we offset all of the output packets by a slip amount - // such that first output packet's display time aligns with the - // input packet's display time. This should give the correct time + // + // Therefore we offset all of the output packets by a slip amount + // such that first output packet's display time aligns with the + // input packet's display time. This should give the correct time // when point-to-point encoding is in effect. if (out_list && out_list->s.start > in->s.start) { @@ -203,13 +292,13 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in ) out = out->next; } } - + return out_list; } /* * Parses the start and stop time from the specified SSA packet. - * + * * Returns true if parsing failed; false otherwise. */ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64_t *in_stop ) @@ -223,7 +312,7 @@ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64 // format specifier "%*128[^,]" will not match on a bare ','. There // must be at least one non ',' character in the match. So the format // specifier is placed directly next to the ':' so that the next - // expected ' ' after the ':' will be the character it matches on + // expected ' ' after the ':' will be the character it matches on // when there is no layer field. int numPartsRead = sscanf( (char *) in_data, "Dialogue:%*128[^,]," "%d:%d:%d.%d," // Start @@ -232,10 +321,10 @@ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64 &end_hr, &end_min, &end_sec, &end_centi ); if ( numPartsRead != 8 ) return 1; - + *in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi); *in_stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi); - + return 0; } @@ -258,137 +347,7 @@ static uint8_t *find_field( uint8_t *pos, uint8_t *end, int fieldNum ) * SSA line format: * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0' * 1 2 3 4 5 6 7 8 9 10 - */ -static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence ) -{ - uint8_t *pos = in_data; - uint8_t *end = in_data + in_size; - - // Parse values for in->s.start and in->s.stop - int64_t in_start, in_stop; - if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) ) - goto fail; - - uint8_t *textFieldPos = find_field( pos, end, 10 ); - if ( textFieldPos == NULL ) - goto fail; - - // Count the number of style overrides in the Text field - int numStyleOverrides = 0; - pos = textFieldPos; - while ( pos < end ) - { - if (*pos++ == '{') - { - numStyleOverrides++; - } - } - - int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE); - hb_buffer_t *out = hb_buffer_init( maxOutputSize ); - if ( out == NULL ) - return NULL; - - /* - * The Text field contains plain text marked up with: - * (1) '\n' -> space - * (2) '\N' -> newline - * (3) curly-brace control codes like '{\k44}' -> HTML tags / strip - * - * Perform the above conversions and copy it to the output packet - */ - StyleSet prevStyles = 0; - uint8_t *dst = out->data; - pos = textFieldPos; - while ( pos < end ) - { - if ( pos[0] == '\\' && pos[1] == 'n' ) - { - *dst++ = ' '; - pos += 2; - } - else if ( pos[0] == '\\' && pos[1] == 'N' ) - { - *dst++ = '\n'; - pos += 2; - } - else if ( pos[0] == '{' ) - { - // Parse SSA style overrides and append appropriate HTML style tags - StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles ); - ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles ); - prevStyles = nextStyles; - - // Skip past SSA control code - while ( pos < end && *pos != '}' ) pos++; - if ( pos < end && *pos == '}' ) pos++; - } - else - { - // Copy raw character - *dst++ = *pos++; - } - } - - // Append closing HTML style tags - ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 ); - - // Trim output buffer to the actual amount of data written - out->size = dst - out->data; - - // Copy metadata from the input packet to the output packet - out->s.frametype = HB_FRAME_SUBTITLE; - out->s.start = in_start; - out->s.stop = in_stop; - out->sequence = in_sequence; - - return out; - -fail: - hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data ); - return NULL; -} - -static hb_buffer_t * ssa_to_mkv_ssa( hb_work_object_t * w, hb_buffer_t * in ) -{ - hb_buffer_t * out_last = NULL; - hb_buffer_t * out_first = NULL; - - // Store NULL after the end of the buffer to make using string processing safe - hb_buffer_realloc(in, ++in->size); - in->data[in->size - 1] = '\0'; - - const char *EOL = "\r\n"; - char *curLine, *curLine_parserData; - for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData ); - curLine; - curLine = strtok_r( NULL, EOL, &curLine_parserData ) ) - { - hb_buffer_t * out; - - out = ssa_decode_line_to_mkv_ssa( w, (uint8_t *) curLine, strlen( curLine ), in->sequence ); - if( out ) - { - if ( out_last == NULL ) - { - out_last = out_first = out; - } - else - { - out_last->next = out; - out_last = out; - } - } - } - - return out_first; -} - -/* - * SSA line format: - * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0' - * 1 2 3 4 5 6 7 8 9 10 - * + * * MKV-SSA packet format: * ReadOrder,Marked, Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0' * 1 2 3 4 5 6 7 8 9 @@ -397,22 +356,11 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i { hb_work_private_t * pv = w->private_data; hb_buffer_t * out; - + // Parse values for in->s.start and in->s.stop int64_t in_start, in_stop; if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) ) goto fail; - - if (pv->raw) - { - out = hb_buffer_init(in_size + 3); - snprintf((char*)out->data, in_size + 3, "%s\r\n", in_data); - out->s.frametype = HB_FRAME_SUBTITLE; - out->s.start = in_start; - out->s.stop = in_stop; - out->sequence = in_sequence; - return out; - } // Convert the SSA packet to MKV-SSA format, which is what libass expects char *mkvIn; @@ -424,18 +372,18 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i // format specifier "%*128[^,]" will not match on a bare ','. There // must be at least one non ',' character in the match. So the format // specifier is placed directly next to the ':' so that the next - // expected ' ' after the ':' will be the character it matches on + // expected ' ' after the ':' will be the character it matches on // when there is no layer field. numPartsRead = sscanf( (char *)in_data, "Dialogue:%128[^,],", layerField ); if ( numPartsRead != 1 ) goto fail; - + styleToTextFields = (char *)find_field( in_data, in_data + in_size, 4 ); if ( styleToTextFields == NULL ) { free( layerField ); goto fail; } - + // The sscanf conversion above will result in an extra space // before the layerField. Strip the space. char *stripLayerField = layerField; @@ -449,9 +397,9 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i strcat( mkvIn, "," ); strcat( mkvIn, stripLayerField ); strcat( mkvIn, "," ); - strcat( mkvIn, (char *) styleToTextFields ); - - out->size = strlen(mkvIn); + strcat( mkvIn, (char *)styleToTextFields ); + + out->size = strlen(mkvIn) + 1; out->s.frametype = HB_FRAME_SUBTITLE; out->s.start = in_start; out->s.stop = in_stop; @@ -461,11 +409,11 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i { hb_buffer_close(&out); } - + free( layerField ); - + return out; - + fail: hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data ); return NULL; @@ -479,24 +427,18 @@ static int decssaInit( hb_work_object_t * w, hb_job_t * job ) w->private_data = pv; pv->job = job; - if (job->mux & HB_MUX_MASK_AV && w->subtitle->config.dest != RENDERSUB ) - { - pv->raw = 1; - } - return 0; } static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in, hb_buffer_t ** buf_out ) { - hb_work_private_t * pv = w->private_data; hb_buffer_t * in = *buf_in; - + #if SSA_VERBOSE_PACKETS printf("\nPACKET(%"PRId64",%"PRId64"): %.*s\n", in->s.start/90, in->s.stop/90, in->size, in->data); #endif - + if ( in->size <= 0 ) { *buf_out = in; @@ -504,15 +446,7 @@ static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in, return HB_WORK_DONE; } - if (w->subtitle->config.dest == PASSTHRUSUB && - (pv->job->mux & HB_MUX_MASK_MKV)) - { - *buf_out = ssa_to_mkv_ssa(w, in); - } - else - { - *buf_out = ssa_decode_packet(w, in); - } + *buf_out = ssa_decode_packet(w, in); return HB_WORK_OK; } |