summaryrefslogtreecommitdiffstats
path: root/libhb/decssasub.c
diff options
context:
space:
mode:
Diffstat (limited to 'libhb/decssasub.c')
-rw-r--r--libhb/decssasub.c462
1 files changed, 198 insertions, 264 deletions
diff --git a/libhb/decssasub.c b/libhb/decssasub.c
index de2c3c672..3cac39934 100644
--- a/libhb/decssasub.c
+++ b/libhb/decssasub.c
@@ -11,42 +11,35 @@
* Converts SSA subtitles to either:
* (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or
* (2) PICTURESUB format, using libass.
- *
+ *
* SSA format references:
* http://www.matroska.org/technical/specs/subtitles/ssa.html
* http://moodub.free.fr/video/ass-specs.doc
* vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
- *
+ *
* libass references:
* libass-0.9.9/ass.h
* vlc-1.0.4/modules/codec/libass.c
- *
+ *
* @author David Foster (davidfstr)
*/
#include <stdlib.h>
#include <stdio.h>
+#include <ctype.h>
#include "hb.h"
#include <ass/ass.h>
+#include "decssasub.h"
+#include "colormap.h"
struct hb_work_private_s
{
// If decoding to PICTURESUB format:
int readOrder;
- int raw;
hb_job_t *job;
};
-typedef enum {
- BOLD = 0x01,
- ITALIC = 0x02,
- UNDERLINE = 0x04
-} StyleSet;
-
-// "<b></b>".len + "<i></i>".len + "<u></u>".len
-#define MAX_OVERHEAD_PER_OVERRIDE (7 * 3)
-
#define SSA_2_HB_TIME(hr,min,sec,centi) \
( 90L * ( hr * 1000L * 60 * 60 +\
min * 1000L * 60 +\
@@ -55,71 +48,189 @@ typedef enum {
#define SSA_VERBOSE_PACKETS 0
-static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles )
+static int ssa_update_style(char *ssa, hb_subtitle_style_t *style)
{
- StyleSet nextStyles = prevStyles;
- for (;;)
+ int pos, end, index;
+
+ if (ssa[0] != '{')
+ return 0;
+
+ pos = 1;
+ while (ssa[pos] != '}' && ssa[pos] != '\0')
{
- // Skip over leading '{' or last '\\'
+ index = -1;
+
+ // Skip any malformed markup junk
+ while (strchr("\\}", ssa[pos]) == NULL) pos++;
pos++;
-
- // Scan for next \code
- while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++;
- if ( *pos != '\\' )
+ // Check for an index that is in some markup (e.g. font color)
+ if (isdigit(ssa[pos]))
+ {
+ index = ssa[pos++] - 0x30;
+ }
+ // Find the end of this markup clause
+ end = pos;
+ while (strchr("\\}", ssa[end]) == NULL) end++;
+ // Handle simple integer valued attributes
+ if (strchr("ibu", ssa[pos]) != NULL && isdigit(ssa[pos+1]))
{
- // End of style override block
- break;
+ int val = strtol(ssa + pos + 1, NULL, 0);
+ switch (ssa[pos])
+ {
+ case 'i':
+ style->flags = (style->flags & ~HB_STYLE_FLAG_ITALIC) |
+ !!val * HB_STYLE_FLAG_ITALIC;
+ break;
+ case 'b':
+ style->flags = (style->flags & ~HB_STYLE_FLAG_BOLD) |
+ !!val * HB_STYLE_FLAG_BOLD;
+ break;
+ case 'u':
+ style->flags = (style->flags & ~HB_STYLE_FLAG_UNDERLINE) |
+ !!val * HB_STYLE_FLAG_UNDERLINE;
+ break;
+ }
}
-
- // If next chars are \[biu][01], interpret it
- if ( strchr("biu", pos[1]) && strchr("01", pos[2]) )
+ if (ssa[pos] == 'c' && ssa[pos+1] == '&' && ssa[pos+2] == 'H')
{
- StyleSet styleID =
- pos[1] == 'b' ? BOLD :
- pos[1] == 'i' ? ITALIC :
- pos[1] == 'u' ? UNDERLINE : 0;
- int enabled = (pos[2] == '1');
-
- if (enabled)
+ // Font color markup
+ char *endptr;
+ uint32_t bgr;
+
+ bgr = strtol(ssa + pos + 3, &endptr, 16);
+ if (*endptr == '&')
{
- nextStyles |= styleID;
+ switch (index)
+ {
+ case -1:
+ case 1:
+ style->fg_rgb = HB_BGR_TO_RGB(bgr);
+ break;
+ case 2:
+ style->alt_rgb = HB_BGR_TO_RGB(bgr);
+ break;
+ case 3:
+ style->ol_rgb = HB_BGR_TO_RGB(bgr);
+ break;
+ case 4:
+ style->bg_rgb = HB_BGR_TO_RGB(bgr);
+ break;
+ default:
+ // Unknown color index, ignore
+ break;
+ }
}
- else
+ }
+ if ((ssa[pos] == 'a' && ssa[pos+1] == '&' && ssa[pos+2] == 'H') ||
+ (!strcmp(ssa+pos, "alpha") && ssa[pos+5] == '&' && ssa[pos+6] == 'H'))
+ {
+ // Font alpha markup
+ char *endptr;
+ uint8_t alpha;
+ int alpha_pos = 3;
+
+ if (ssa[1] == 'l')
+ alpha_pos = 7;
+
+ alpha = strtol(ssa + pos + alpha_pos, &endptr, 16);
+ if (*endptr == '&')
{
- nextStyles &= ~styleID;
+ // SSA alpha is inverted 0 is opaque
+ alpha = 255 - alpha;
+ switch (index)
+ {
+ case -1:
+ case 1:
+ style->fg_alpha = alpha;
+ break;
+ case 2:
+ style->alt_alpha = alpha;
+ break;
+ case 3:
+ style->ol_alpha = alpha;
+ break;
+ case 4:
+ style->bg_alpha = alpha;
+ break;
+ default:
+ // Unknown alpha index, ignore
+ break;
+ }
}
}
+ pos = end;
}
- return nextStyles;
+ if (ssa[pos] == '}')
+ pos++;
+ return pos;
}
-static void ssa_append_html_tags_for_style_change(
- uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles )
+char * hb_ssa_to_text(char *in, int *consumed, hb_subtitle_style_t *style)
{
- #define APPEND(str) { \
- char *src = str; \
- while (*src) { *(*dst)++ = *src++; } \
+ int markup_len = 0;
+ int in_pos = 0;
+ int out_pos = 0;
+ char *out = malloc(strlen(in) + 1); // out will never be longer than in
+
+ for (in_pos = 0; in[in_pos] != '\0'; in_pos++)
+ {
+ if ((markup_len = ssa_update_style(in + in_pos, style)))
+ {
+ *consumed = in_pos + markup_len;
+ out[out_pos++] = '\0';
+ return out;
+ }
+ // Check escape codes
+ if (in[in_pos] == '\\')
+ {
+ in_pos++;
+ switch (in[in_pos])
+ {
+ case '\0':
+ in_pos--;
+ break;
+ case 'N':
+ case 'n':
+ out[out_pos++] = '\n';
+ break;
+ case 'h':
+ out[out_pos++] = ' ';
+ break;
+ default:
+ out[out_pos++] = in[in_pos];
+ break;
+ }
+ }
+ else
+ {
+ out[out_pos++] = in[in_pos];
+ }
}
+ *consumed = in_pos;
+ out[out_pos++] = '\0';
+ return out;
+}
+
+void hb_ssa_style_init(hb_subtitle_style_t *style)
+{
+ style->flags = 0;
+
+ style->fg_rgb = 0x00FFFFFF;
+ style->alt_rgb = 0x00FFFFFF;
+ style->ol_rgb = 0x000F0F0F;
+ style->bg_rgb = 0x000F0F0F;
- // Reverse-order close all previous styles
- if (prevStyles & UNDERLINE) APPEND("</u>");
- if (prevStyles & ITALIC) APPEND("</i>");
- if (prevStyles & BOLD) APPEND("</b>");
-
- // Forward-order open all next styles
- if (nextStyles & BOLD) APPEND("<b>");
- if (nextStyles & ITALIC) APPEND("<i>");
- if (nextStyles & UNDERLINE) APPEND("<u>");
-
- #undef APPEND
+ style->fg_alpha = 0xFF;
+ style->alt_alpha = 0xFF;
+ style->ol_alpha = 0xFF;
+ style->bg_alpha = 0xFF;
}
-static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence );
static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence );
/*
* Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets.
- *
+ *
* SSA packet format:
* ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
* 1 2 3 4 5 6 7 8 9 10
@@ -129,7 +240,7 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in )
// Store NULL after the end of the buffer to make using string processing safe
hb_buffer_realloc(in, ++in->size);
in->data[in->size - 1] = '\0';
-
+
hb_buffer_t *out_list = NULL;
hb_buffer_t **nextPtr = &out_list;
@@ -142,53 +253,31 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in )
// Skip empty lines and spaces between adjacent CR and LF
if (curLine[0] == '\0')
continue;
-
+
// Decode an individual SSA line
hb_buffer_t *out;
- if ( w->subtitle->config.dest == PASSTHRUSUB ) {
- out = ssa_decode_line_to_utf8( (uint8_t *) curLine, strlen( curLine ), in->sequence );
- if ( out == NULL )
- continue;
-
- // We shouldn't be storing the extra NULL character,
- // but the MP4 muxer expects this, unfortunately.
- if (out->size > 0 && out->data[out->size - 1] != '\0')
- {
- hb_buffer_realloc(out, ++out->size);
- out->data[out->size - 1] = '\0';
- }
-
- // If the input packet was non-empty, do not pass through
- // an empty output packet (even if the subtitle was empty),
- // as this would be interpreted as an end-of-stream
- if ( in->size > 0 && out->size == 0 ) {
- hb_buffer_close(&out);
- continue;
- }
- } else if ( w->subtitle->config.dest == RENDERSUB ) {
- out = ssa_decode_line_to_mkv_ssa( w, (uint8_t *) curLine, strlen( curLine ), in->sequence );
- if ( out == NULL )
- continue;
- }
-
+ out = ssa_decode_line_to_mkv_ssa(w, (uint8_t *)curLine, strlen(curLine), in->sequence);
+ if ( out == NULL )
+ continue;
+
// Append 'out' to 'out_list'
*nextPtr = out;
nextPtr = &out->next;
}
- // For point-to-point encoding, when the start time of the stream
+ // For point-to-point encoding, when the start time of the stream
// may be offset, the timestamps of the subtitles must be offset as well.
//
// HACK: Here we are making the assumption that, under normal circumstances,
// the output display time of the first output packet is equal to the
// display time of the input packet.
- //
- // During point-to-point encoding, the display time of the input
+ //
+ // During point-to-point encoding, the display time of the input
// packet will be offset to compensate.
- //
- // Therefore we offset all of the output packets by a slip amount
- // such that first output packet's display time aligns with the
- // input packet's display time. This should give the correct time
+ //
+ // Therefore we offset all of the output packets by a slip amount
+ // such that first output packet's display time aligns with the
+ // input packet's display time. This should give the correct time
// when point-to-point encoding is in effect.
if (out_list && out_list->s.start > in->s.start)
{
@@ -203,13 +292,13 @@ static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in )
out = out->next;
}
}
-
+
return out_list;
}
/*
* Parses the start and stop time from the specified SSA packet.
- *
+ *
* Returns true if parsing failed; false otherwise.
*/
static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64_t *in_stop )
@@ -223,7 +312,7 @@ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64
// format specifier "%*128[^,]" will not match on a bare ','. There
// must be at least one non ',' character in the match. So the format
// specifier is placed directly next to the ':' so that the next
- // expected ' ' after the ':' will be the character it matches on
+ // expected ' ' after the ':' will be the character it matches on
// when there is no layer field.
int numPartsRead = sscanf( (char *) in_data, "Dialogue:%*128[^,],"
"%d:%d:%d.%d," // Start
@@ -232,10 +321,10 @@ static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64
&end_hr, &end_min, &end_sec, &end_centi );
if ( numPartsRead != 8 )
return 1;
-
+
*in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
*in_stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi);
-
+
return 0;
}
@@ -258,137 +347,7 @@ static uint8_t *find_field( uint8_t *pos, uint8_t *end, int fieldNum )
* SSA line format:
* Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
* 1 2 3 4 5 6 7 8 9 10
- */
-static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence )
-{
- uint8_t *pos = in_data;
- uint8_t *end = in_data + in_size;
-
- // Parse values for in->s.start and in->s.stop
- int64_t in_start, in_stop;
- if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
- goto fail;
-
- uint8_t *textFieldPos = find_field( pos, end, 10 );
- if ( textFieldPos == NULL )
- goto fail;
-
- // Count the number of style overrides in the Text field
- int numStyleOverrides = 0;
- pos = textFieldPos;
- while ( pos < end )
- {
- if (*pos++ == '{')
- {
- numStyleOverrides++;
- }
- }
-
- int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE);
- hb_buffer_t *out = hb_buffer_init( maxOutputSize );
- if ( out == NULL )
- return NULL;
-
- /*
- * The Text field contains plain text marked up with:
- * (1) '\n' -> space
- * (2) '\N' -> newline
- * (3) curly-brace control codes like '{\k44}' -> HTML tags / strip
- *
- * Perform the above conversions and copy it to the output packet
- */
- StyleSet prevStyles = 0;
- uint8_t *dst = out->data;
- pos = textFieldPos;
- while ( pos < end )
- {
- if ( pos[0] == '\\' && pos[1] == 'n' )
- {
- *dst++ = ' ';
- pos += 2;
- }
- else if ( pos[0] == '\\' && pos[1] == 'N' )
- {
- *dst++ = '\n';
- pos += 2;
- }
- else if ( pos[0] == '{' )
- {
- // Parse SSA style overrides and append appropriate HTML style tags
- StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles );
- ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles );
- prevStyles = nextStyles;
-
- // Skip past SSA control code
- while ( pos < end && *pos != '}' ) pos++;
- if ( pos < end && *pos == '}' ) pos++;
- }
- else
- {
- // Copy raw character
- *dst++ = *pos++;
- }
- }
-
- // Append closing HTML style tags
- ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 );
-
- // Trim output buffer to the actual amount of data written
- out->size = dst - out->data;
-
- // Copy metadata from the input packet to the output packet
- out->s.frametype = HB_FRAME_SUBTITLE;
- out->s.start = in_start;
- out->s.stop = in_stop;
- out->sequence = in_sequence;
-
- return out;
-
-fail:
- hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
- return NULL;
-}
-
-static hb_buffer_t * ssa_to_mkv_ssa( hb_work_object_t * w, hb_buffer_t * in )
-{
- hb_buffer_t * out_last = NULL;
- hb_buffer_t * out_first = NULL;
-
- // Store NULL after the end of the buffer to make using string processing safe
- hb_buffer_realloc(in, ++in->size);
- in->data[in->size - 1] = '\0';
-
- const char *EOL = "\r\n";
- char *curLine, *curLine_parserData;
- for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData );
- curLine;
- curLine = strtok_r( NULL, EOL, &curLine_parserData ) )
- {
- hb_buffer_t * out;
-
- out = ssa_decode_line_to_mkv_ssa( w, (uint8_t *) curLine, strlen( curLine ), in->sequence );
- if( out )
- {
- if ( out_last == NULL )
- {
- out_last = out_first = out;
- }
- else
- {
- out_last->next = out;
- out_last = out;
- }
- }
- }
-
- return out_first;
-}
-
-/*
- * SSA line format:
- * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
- * 1 2 3 4 5 6 7 8 9 10
- *
+ *
* MKV-SSA packet format:
* ReadOrder,Marked, Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
* 1 2 3 4 5 6 7 8 9
@@ -397,22 +356,11 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i
{
hb_work_private_t * pv = w->private_data;
hb_buffer_t * out;
-
+
// Parse values for in->s.start and in->s.stop
int64_t in_start, in_stop;
if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
goto fail;
-
- if (pv->raw)
- {
- out = hb_buffer_init(in_size + 3);
- snprintf((char*)out->data, in_size + 3, "%s\r\n", in_data);
- out->s.frametype = HB_FRAME_SUBTITLE;
- out->s.start = in_start;
- out->s.stop = in_stop;
- out->sequence = in_sequence;
- return out;
- }
// Convert the SSA packet to MKV-SSA format, which is what libass expects
char *mkvIn;
@@ -424,18 +372,18 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i
// format specifier "%*128[^,]" will not match on a bare ','. There
// must be at least one non ',' character in the match. So the format
// specifier is placed directly next to the ':' so that the next
- // expected ' ' after the ':' will be the character it matches on
+ // expected ' ' after the ':' will be the character it matches on
// when there is no layer field.
numPartsRead = sscanf( (char *)in_data, "Dialogue:%128[^,],", layerField );
if ( numPartsRead != 1 )
goto fail;
-
+
styleToTextFields = (char *)find_field( in_data, in_data + in_size, 4 );
if ( styleToTextFields == NULL ) {
free( layerField );
goto fail;
}
-
+
// The sscanf conversion above will result in an extra space
// before the layerField. Strip the space.
char *stripLayerField = layerField;
@@ -449,9 +397,9 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i
strcat( mkvIn, "," );
strcat( mkvIn, stripLayerField );
strcat( mkvIn, "," );
- strcat( mkvIn, (char *) styleToTextFields );
-
- out->size = strlen(mkvIn);
+ strcat( mkvIn, (char *)styleToTextFields );
+
+ out->size = strlen(mkvIn) + 1;
out->s.frametype = HB_FRAME_SUBTITLE;
out->s.start = in_start;
out->s.stop = in_stop;
@@ -461,11 +409,11 @@ static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *i
{
hb_buffer_close(&out);
}
-
+
free( layerField );
-
+
return out;
-
+
fail:
hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
return NULL;
@@ -479,24 +427,18 @@ static int decssaInit( hb_work_object_t * w, hb_job_t * job )
w->private_data = pv;
pv->job = job;
- if (job->mux & HB_MUX_MASK_AV && w->subtitle->config.dest != RENDERSUB )
- {
- pv->raw = 1;
- }
-
return 0;
}
static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
hb_buffer_t ** buf_out )
{
- hb_work_private_t * pv = w->private_data;
hb_buffer_t * in = *buf_in;
-
+
#if SSA_VERBOSE_PACKETS
printf("\nPACKET(%"PRId64",%"PRId64"): %.*s\n", in->s.start/90, in->s.stop/90, in->size, in->data);
#endif
-
+
if ( in->size <= 0 )
{
*buf_out = in;
@@ -504,15 +446,7 @@ static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
return HB_WORK_DONE;
}
- if (w->subtitle->config.dest == PASSTHRUSUB &&
- (pv->job->mux & HB_MUX_MASK_MKV))
- {
- *buf_out = ssa_to_mkv_ssa(w, in);
- }
- else
- {
- *buf_out = ssa_decode_packet(w, in);
- }
+ *buf_out = ssa_decode_packet(w, in);
return HB_WORK_OK;
}