/* decssasub.c
Copyright (c) 2003-2019 HandBrake Team
This file is part of the HandBrake source code
Homepage: .
It may be used under the terms of the GNU General Public License v2.
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
*/
/*
* Converts SSA subtitles to either:
* (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (, , ), or
* (2) PICTURESUB format, using libass.
*
* SSA format references:
* http://www.matroska.org/technical/specs/subtitles/ssa.html
* http://moodub.free.fr/video/ass-specs.doc
* vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
*
* libass references:
* libass-0.9.9/ass.h
* vlc-1.0.4/modules/codec/libass.c
*
* @author David Foster (davidfstr)
*/
#include
#include
#include
#include "hb.h"
#include
#include "decssasub.h"
#include "colormap.h"
struct hb_work_private_s
{
hb_job_t * job;
hb_subtitle_t * subtitle;
// SSA Import
FILE * file;
int readOrder;
};
#define SSA_VERBOSE_PACKETS 0
static int extradataInit( hb_work_private_t * pv )
{
int events = 0;
char * events_tag = "[Events]";
char * format_tag = "Format:";
int events_len = strlen(events_tag);;
int format_len = strlen(format_tag);;
char * header = NULL;
while (1)
{
char * line = NULL;
ssize_t len;
size_t size = 0;
len = hb_getline(&line, &size, pv->file);
if (len < 0)
{
// Incomplete SSA header
free(header);
return 1;
}
if (len > 0 && line != NULL)
{
if (header != NULL)
{
char * tmp = header;
header = hb_strdup_printf("%s%s", header, line);
free(tmp);
}
else
{
header = strdup(line);
}
if (!events)
{
if (len >= events_len &&
!strncasecmp(line, events_tag, events_len))
{
events = 1;
}
}
else
{
if (len >= format_len &&
!strncasecmp(line, format_tag, format_len))
{
free(line);
break;
}
// Improperly formatted SSA header
free(header);
return 1;
}
}
free(line);
}
pv->subtitle->extradata = (uint8_t*)header;
pv->subtitle->extradata_size = strlen(header) + 1;
return 0;
}
static int decssaInit( hb_work_object_t * w, hb_job_t * job )
{
hb_work_private_t * pv;
pv = calloc( 1, sizeof( hb_work_private_t ) );
w->private_data = pv;
pv->job = job;
pv->subtitle = w->subtitle;
if (w->fifo_in == NULL && pv->subtitle->config.src_filename != NULL)
{
pv->file = hb_fopen(pv->subtitle->config.src_filename, "r");
if(pv->file == NULL)
{
hb_error("Could not open the SSA subtitle file '%s'\n",
pv->subtitle->config.src_filename);
goto fail;
}
// Read SSA header and store in subtitle extradata
if (extradataInit(pv))
{
goto fail;
}
}
return 0;
fail:
if (pv != NULL)
{
if (pv->file != NULL)
{
fclose(pv->file);
}
free(pv);
w->private_data = NULL;
}
return 1;
}
#define SSA_2_HB_TIME(hr,min,sec,centi) \
( 90LL * ( hr * 1000LL * 60 * 60 +\
min * 1000LL * 60 +\
sec * 1000LL +\
centi * 10LL ) )
/*
* Parses the start and stop time from the specified SSA packet.
*
* Returns true if parsing failed; false otherwise.
*/
static int parse_timing( char *line, int64_t *start, int64_t *stop )
{
/*
* Parse Start and End fields for timing information
*/
int start_hr, start_min, start_sec, start_centi;
int end_hr, end_min, end_sec, end_centi;
// SSA subtitles have an empty layer field (bare ','). The scanf
// format specifier "%*128[^,]" will not match on a bare ','. There
// must be at least one non ',' character in the match. So the format
// specifier is placed directly next to the ':' so that the next
// expected ' ' after the ':' will be the character it matches on
// when there is no layer field.
int numPartsRead = sscanf(line, "Dialogue:%*128[^,],"
"%d:%d:%d.%d," // Start
"%d:%d:%d.%d,", // End
&start_hr, &start_min, &start_sec, &start_centi,
&end_hr, &end_min, &end_sec, &end_centi );
if ( numPartsRead != 8 )
return 1;
*start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
*stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi);
return 0;
}
static char * find_field( char * pos, char * end, int fieldNum )
{
int curFieldID = 1;
while (pos < end)
{
if ( *pos++ == ',' )
{
curFieldID++;
if ( curFieldID == fieldNum )
return pos;
}
}
return NULL;
}
/*
* SSA line format:
* Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
* 1 2 3 4 5 6 7 8 9 10
*
* MKV-SSA packet format:
* ReadOrder,Marked, Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
* 1 2 3 4 5 6 7 8 9
*/
static hb_buffer_t *
decode_line_to_mkv_ssa( hb_work_private_t * pv, char * line, int size )
{
hb_buffer_t * out;
int64_t start, stop;
if (parse_timing(line, &start, &stop))
{
goto fail;
}
// Convert the SSA packet to MKV-SSA format, which is what libass expects
char * mkvSSA;
int numPartsRead;
char * styleToTextFields;
char * layerField = malloc(size);
// SSA subtitles have an empty layer field (bare ','). The scanf
// format specifier "%*128[^,]" will not match on a bare ','. There
// must be at least one non ',' character in the match. So the format
// specifier is placed directly next to the ':' so that the next
// expected ' ' after the ':' will be the character it matches on
// when there is no layer field.
numPartsRead = sscanf( (char *)line, "Dialogue:%128[^,],", layerField );
if ( numPartsRead != 1 )
{
free(layerField);
goto fail;
}
styleToTextFields = find_field( line, line + size, 4 );
if ( styleToTextFields == NULL ) {
free( layerField );
goto fail;
}
// The sscanf conversion above will result in an extra space
// before the layerField. Strip the space.
char *stripLayerField = layerField;
for(; *stripLayerField == ' '; stripLayerField++);
out = hb_buffer_init( size + 1 );
mkvSSA = (char*)out->data;
mkvSSA[0] = '\0';
sprintf(mkvSSA, "%d", pv->readOrder++);
strcat( mkvSSA, "," );
strcat( mkvSSA, stripLayerField );
strcat( mkvSSA, "," );
strcat( mkvSSA, (char *)styleToTextFields );
out->size = strlen(mkvSSA) + 1;
out->s.frametype = HB_FRAME_SUBTITLE;
out->s.start = start + pv->subtitle->config.offset * 90;
out->s.duration = stop - start;
out->s.stop = stop + pv->subtitle->config.offset * 90;
if( out->size == 0 )
{
hb_buffer_close(&out);
}
free( layerField );
return out;
fail:
hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", size, line );
return NULL;
}
/*
* Read the SSA file and put the entries into the subtitle fifo for all to read
*/
static hb_buffer_t * ssa_read( hb_work_private_t * pv )
{
hb_buffer_t * out;
while (!feof(pv->file))
{
char * line = NULL;
ssize_t len;
size_t size = 0;
len = hb_getline(&line, &size, pv->file);
if (len > 0 && line != NULL)
{
out = decode_line_to_mkv_ssa(pv, line, len);
if (out != NULL)
{
free(line);
return out;
}
}
free(line);
if (len < 0)
{
// Error or EOF
out = hb_buffer_eof_init();
return out;
}
}
out = hb_buffer_eof_init();
return out;
}
static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
hb_buffer_t ** buf_out )
{
hb_work_private_t * pv = w->private_data;
hb_buffer_t * in = *buf_in;
*buf_in = NULL;
if (in == NULL && pv->file != NULL)
{
in = ssa_read(pv);
}
*buf_out = in;
if (in->s.flags & HB_BUF_FLAG_EOF)
{
return HB_WORK_DONE;
}
// Not much to do here. ffmpeg already supplies SSA subtitles in the
// requried matroska packet format.
//
// We require string termination of the buffer
hb_buffer_realloc(in, ++in->size);
in->data[in->size - 1] = '\0';
#if SSA_VERBOSE_PACKETS
printf("\nPACKET(%"PRId64",%"PRId64"): %.*s\n", in->s.start/90, in->s.stop/90, in->size, in->data);
#endif
return HB_WORK_OK;
}
static void decssaClose( hb_work_object_t * w )
{
free( w->private_data );
}
hb_work_object_t hb_decssasub =
{
WORK_DECSSASUB,
"SSA Subtitle Decoder",
decssaInit,
decssaWork,
decssaClose
};