summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libhb/decssasub.c269
1 files changed, 269 insertions, 0 deletions
diff --git a/libhb/decssasub.c b/libhb/decssasub.c
new file mode 100644
index 000000000..e4ff0dc59
--- /dev/null
+++ b/libhb/decssasub.c
@@ -0,0 +1,269 @@
+/*
+ This file is part of the HandBrake source code.
+ Homepage: <http://handbrake.fr/>.
+ It may be used under the terms of the GNU General Public License. */
+
+/*
+ * Converts SSA subtitles to UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>).
+ *
+ * SSA format references:
+ * http://www.matroska.org/technical/specs/subtitles/ssa.html
+ * http://moodub.free.fr/video/ass-specs.doc
+ * vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
+ *
+ * @author David Foster (davidfstr)
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "hb.h"
+
+typedef enum {
+ BOLD = 0x01,
+ ITALIC = 0x02,
+ UNDERLINE = 0x04
+} StyleSet;
+
+// "<b></b>".len + "<i></i>".len + "<u></u>".len
+#define MAX_OVERHEAD_PER_OVERRIDE (7 * 3)
+
+#define SSA_2_HB_TIME(hr,min,sec,centi) \
+ ( 90L * ( hr * 1000L * 60 * 60 +\
+ min * 1000L * 60 +\
+ sec * 1000L +\
+ centi * 10L ) )
+
+static StyleSet ssa_parse_style_override( char *pos, StyleSet prevStyles )
+{
+ StyleSet nextStyles = prevStyles;
+ for (;;)
+ {
+ // Skip over leading '{' or last '\\'
+ pos++;
+
+ // Scan for next \code
+ while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++;
+ if ( *pos != '\\' )
+ {
+ // End of style override block
+ break;
+ }
+
+ // If next chars are \[biu][01], interpret it
+ if ( strchr("biu", pos[1]) && strchr("01", pos[2]) )
+ {
+ StyleSet styleID =
+ pos[1] == 'b' ? BOLD :
+ pos[1] == 'i' ? ITALIC :
+ pos[1] == 'u' ? UNDERLINE : 0;
+ int enabled = (pos[2] == '1');
+
+ if (enabled)
+ {
+ nextStyles |= styleID;
+ }
+ else
+ {
+ nextStyles &= ~styleID;
+ }
+ }
+ }
+ return nextStyles;
+}
+
+static void ssa_append_html_tags_for_style_change(
+ char **dst, StyleSet prevStyles, StyleSet nextStyles )
+{
+ #define APPEND(str) { \
+ char *src = str; \
+ while (*src) { *(*dst)++ = *src++; } \
+ }
+
+ // Reverse-order close all previous styles
+ if (prevStyles & UNDERLINE) APPEND("</u>");
+ if (prevStyles & ITALIC) APPEND("</i>");
+ if (prevStyles & BOLD) APPEND("</b>");
+
+ // Forward-order open all next styles
+ if (nextStyles & BOLD) APPEND("<b>");
+ if (nextStyles & ITALIC) APPEND("<i>");
+ if (nextStyles & UNDERLINE) APPEND("<u>");
+
+ #undef APPEND
+}
+
+/*
+ * SSA packet format:
+ * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+ * 1 2 3 4 5 6 7 8 9 10
+ */
+static hb_buffer_t *ssa_decode_to_utf8( hb_buffer_t *in )
+{
+ uint8_t *pos = in->data;
+ uint8_t *end = in->data + in->size;
+
+ // Store NULL after the end of the buffer to make using sscanf safe
+ hb_buffer_realloc( in, in->size + 1 );
+ in->data[in->size] = '\0';
+
+ /*
+ * Parse Start and End fields for timing information
+ */
+ int start_hr, start_min, start_sec, start_centi;
+ int end_hr, end_min, end_sec, end_centi;
+ int numPartsRead = sscanf( (char *) in->data, "%*128[^,],"
+ "%d:%d:%d.%d," // Start
+ "%d:%d:%d.%d,", // End
+ &start_hr, &start_min, &start_sec, &start_centi,
+ &end_hr, &end_min, &end_sec, &end_centi );
+ if ( numPartsRead != 8 )
+ goto fail;
+
+ in->start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
+ in->stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi);
+
+ /*
+ * Advance 'pos' to the beginning of the Text field
+ */
+ int curFieldID = 1;
+ while (pos < end)
+ {
+ if ( *pos++ == ',' )
+ {
+ curFieldID++;
+ if ( curFieldID == 10 ) // Text
+ break;
+ }
+ }
+ if ( curFieldID != 10 )
+ goto fail;
+ uint8_t *textFieldPos = pos;
+
+ // Count the number of style overrides in the Text field
+ int numStyleOverrides = 0;
+ while ( pos < end )
+ {
+ if (*pos++ == '{')
+ {
+ numStyleOverrides++;
+ }
+ }
+
+ int maxOutputSize = (end - pos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE);
+ hb_buffer_t *out = hb_buffer_init( maxOutputSize );
+ if ( out == NULL )
+ return NULL;
+
+ /*
+ * The Text field contains plain text marked up with:
+ * (1) '\n' -> space
+ * (2) '\N' -> newline
+ * (3) curly-brace control codes like '{\k44}' -> empty (strip them)
+ *
+ * Perform the above conversions and copy it to the output packet
+ */
+ StyleSet prevStyles = 0;
+ uint8_t *dst = out->data;
+ pos = textFieldPos;
+ while ( pos < end )
+ {
+ if ( pos[0] == '\\' && pos[1] == 'n' )
+ {
+ *dst++ = ' ';
+ pos += 2;
+ }
+ else if ( pos[0] == '\\' && pos[1] == 'N' )
+ {
+ *dst++ = '\n';
+ pos += 2;
+ }
+ else if ( pos[0] == '{' )
+ {
+ // Parse SSA style overrides and append appropriate HTML style tags
+ StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles );
+ ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles );
+ prevStyles = nextStyles;
+
+ // Skip past SSA control code
+ while ( pos < end && *pos != '}' ) pos++;
+ if ( pos < end && *pos == '}' ) pos++;
+ }
+ else
+ {
+ // Copy raw character
+ *dst++ = *pos++;
+ }
+ }
+
+ // Append closing HTML style tags
+ ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 );
+
+ // Trim output buffer to the actual amount of data written
+ out->size = dst - out->data;
+
+ // Copy metadata from the input packet to the output packet
+ out->start = in->start;
+ out->stop = in->stop;
+
+ return out;
+
+fail:
+ hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in->size, in->data );
+ return NULL;
+}
+
+static int decssaInit( hb_work_object_t * w, hb_job_t * job )
+{
+ return 0;
+}
+
+static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
+ hb_buffer_t ** buf_out )
+{
+ hb_buffer_t * in = *buf_in;
+ hb_buffer_t * out = NULL;
+
+ if ( in->size > 0 ) {
+ out = ssa_decode_to_utf8(in);
+ } else {
+ out = hb_buffer_init( 0 );
+ }
+
+ if ( out != NULL ) {
+ // We shouldn't be storing the extra NULL character,
+ // but the MP4 muxer expects this, unfortunately.
+ if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
+ // NOTE: out->size remains unchanged
+ hb_buffer_realloc( out, out->size + 1 );
+ out->data[out->size] = '\0';
+ }
+
+ // If the input packet was non-empty, do not pass through
+ // an empty output packet (even if the subtitle was empty),
+ // as this would be interpreted as an end-of-stream
+ if ( in->size > 0 && out->size == 0 ) {
+ hb_buffer_close(&out);
+ }
+ }
+
+ // Dispose the input packet, as it is no longer needed
+ hb_buffer_close(&in);
+
+ *buf_in = NULL;
+ *buf_out = out;
+ return HB_WORK_OK;
+}
+
+static void decssaClose( hb_work_object_t * w )
+{
+ // nothing
+}
+
+hb_work_object_t hb_decssasub =
+{
+ WORK_DECSSASUB,
+ "SSA Subtitle Decoder",
+ decssaInit,
+ decssaWork,
+ decssaClose
+};