/*
This file is part of the HandBrake source code.
Homepage: .
It may be used under the terms of the GNU General Public License. */
/*
* Converts SSA subtitles to UTF-8 subtitles with limited HTML-style markup (, , ).
*
* SSA format references:
* http://www.matroska.org/technical/specs/subtitles/ssa.html
* http://moodub.free.fr/video/ass-specs.doc
* vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
*
* @author David Foster (davidfstr)
*/
#include
#include
#include "hb.h"
typedef enum {
BOLD = 0x01,
ITALIC = 0x02,
UNDERLINE = 0x04
} StyleSet;
// "".len + "".len + "".len
#define MAX_OVERHEAD_PER_OVERRIDE (7 * 3)
#define SSA_2_HB_TIME(hr,min,sec,centi) \
( 90L * ( hr * 1000L * 60 * 60 +\
min * 1000L * 60 +\
sec * 1000L +\
centi * 10L ) )
static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles )
{
StyleSet nextStyles = prevStyles;
for (;;)
{
// Skip over leading '{' or last '\\'
pos++;
// Scan for next \code
while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++;
if ( *pos != '\\' )
{
// End of style override block
break;
}
// If next chars are \[biu][01], interpret it
if ( strchr("biu", pos[1]) && strchr("01", pos[2]) )
{
StyleSet styleID =
pos[1] == 'b' ? BOLD :
pos[1] == 'i' ? ITALIC :
pos[1] == 'u' ? UNDERLINE : 0;
int enabled = (pos[2] == '1');
if (enabled)
{
nextStyles |= styleID;
}
else
{
nextStyles &= ~styleID;
}
}
}
return nextStyles;
}
static void ssa_append_html_tags_for_style_change(
uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles )
{
#define APPEND(str) { \
char *src = str; \
while (*src) { *(*dst)++ = *src++; } \
}
// Reverse-order close all previous styles
if (prevStyles & UNDERLINE) APPEND("");
if (prevStyles & ITALIC) APPEND("");
if (prevStyles & BOLD) APPEND("");
// Forward-order open all next styles
if (nextStyles & BOLD) APPEND("");
if (nextStyles & ITALIC) APPEND("");
if (nextStyles & UNDERLINE) APPEND("");
#undef APPEND
}
static hb_buffer_t *ssa_decode_to_utf8_line( uint8_t *in_data, int in_size );
/*
* SSA packet format:
* ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
* 1 2 3 4 5 6 7 8 9 10
*/
static hb_buffer_t *ssa_decode_to_utf8( hb_buffer_t *in )
{
// Store NULL after the end of the buffer to make using string processing safe
hb_buffer_realloc( in, in->size + 1 );
in->data[in->size] = '\0';
hb_buffer_t *out_list = NULL;
hb_buffer_t **nextPtr = &out_list;
const char *EOL = "\r\n";
char *curLine, *curLine_parserData;
for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData );
curLine;
curLine = strtok_r( NULL, EOL, &curLine_parserData ) )
{
// Skip empty lines and spaces between adjacent CR and LF
if (curLine[0] == '\0')
continue;
// Decode an individual SSA line
hb_buffer_t *out = ssa_decode_to_utf8_line( (uint8_t*)curLine, strlen( curLine ) );
// We shouldn't be storing the extra NULL character,
// but the MP4 muxer expects this, unfortunately.
if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
// NOTE: out->size remains unchanged
hb_buffer_realloc( out, out->size + 1 );
out->data[out->size] = '\0';
}
// If the input packet was non-empty, do not pass through
// an empty output packet (even if the subtitle was empty),
// as this would be interpreted as an end-of-stream
if ( in->size > 0 && out->size == 0 ) {
hb_buffer_close(&out);
continue;
}
// Append 'out' to 'out_list'
*nextPtr = out;
nextPtr = &out->next;
}
return out_list;
}
/*
* SSA line format:
* Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
* 1 2 3 4 5 6 7 8 9 10
*/
static hb_buffer_t *ssa_decode_to_utf8_line( uint8_t *in_data, int in_size )
{
uint8_t *pos = in_data;
uint8_t *end = in_data + in_size;
/*
* Parse Start and End fields for timing information
*/
int start_hr, start_min, start_sec, start_centi;
int end_hr, end_min, end_sec, end_centi;
int numPartsRead = sscanf( (char *) in_data, "%*128[^,],"
"%d:%d:%d.%d," // Start
"%d:%d:%d.%d,", // End
&start_hr, &start_min, &start_sec, &start_centi,
&end_hr, &end_min, &end_sec, &end_centi );
if ( numPartsRead != 8 )
goto fail;
int64_t in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
int64_t in_stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi);
/*
* Advance 'pos' to the beginning of the Text field
*/
int curFieldID = 1;
while (pos < end)
{
if ( *pos++ == ',' )
{
curFieldID++;
if ( curFieldID == 10 ) // Text
break;
}
}
if ( curFieldID != 10 )
goto fail;
uint8_t *textFieldPos = pos;
// Count the number of style overrides in the Text field
int numStyleOverrides = 0;
while ( pos < end )
{
if (*pos++ == '{')
{
numStyleOverrides++;
}
}
int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE);
hb_buffer_t *out = hb_buffer_init( maxOutputSize );
if ( out == NULL )
return NULL;
/*
* The Text field contains plain text marked up with:
* (1) '\n' -> space
* (2) '\N' -> newline
* (3) curly-brace control codes like '{\k44}' -> empty (strip them)
*
* Perform the above conversions and copy it to the output packet
*/
StyleSet prevStyles = 0;
uint8_t *dst = out->data;
pos = textFieldPos;
while ( pos < end )
{
if ( pos[0] == '\\' && pos[1] == 'n' )
{
*dst++ = ' ';
pos += 2;
}
else if ( pos[0] == '\\' && pos[1] == 'N' )
{
*dst++ = '\n';
pos += 2;
}
else if ( pos[0] == '{' )
{
// Parse SSA style overrides and append appropriate HTML style tags
StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles );
ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles );
prevStyles = nextStyles;
// Skip past SSA control code
while ( pos < end && *pos != '}' ) pos++;
if ( pos < end && *pos == '}' ) pos++;
}
else
{
// Copy raw character
*dst++ = *pos++;
}
}
// Append closing HTML style tags
ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 );
// Trim output buffer to the actual amount of data written
out->size = dst - out->data;
// Copy metadata from the input packet to the output packet
out->start = in_start;
out->stop = in_stop;
return out;
fail:
hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
return NULL;
}
static int decssaInit( hb_work_object_t * w, hb_job_t * job )
{
return 0;
}
static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
hb_buffer_t ** buf_out )
{
hb_buffer_t * in = *buf_in;
hb_buffer_t * out_list = NULL;
if ( in->size > 0 ) {
out_list = ssa_decode_to_utf8(in);
} else {
out_list = hb_buffer_init( 0 );
}
// Dispose the input packet, as it is no longer needed
hb_buffer_close(&in);
*buf_in = NULL;
*buf_out = out_list;
return HB_WORK_OK;
}
static void decssaClose( hb_work_object_t * w )
{
// nothing
}
hb_work_object_t hb_decssasub =
{
WORK_DECSSASUB,
"SSA Subtitle Decoder",
decssaInit,
decssaWork,
decssaClose
};