diff options
author | jstebbins <[email protected]> | 2009-11-16 17:37:32 +0000 |
---|---|---|
committer | jstebbins <[email protected]> | 2009-11-16 17:37:32 +0000 |
commit | 234471f57b92b167f15a6cd99e81465337302d56 (patch) | |
tree | 91cea958059ac16ef3f5d451705bc869106501de | |
parent | e66c8fbeba618eb7b810a8865965edd107a762ae (diff) |
add some input error resiliency to the srt parser.
an extra blank line between entries threw everything off for the
entire remainder of the file.
added additional logic for processing blank lines
added detection of bad timestamps and resynchronization when bad
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@2939 b64f7644-9d1e-0410-96f1-a4d463321fa5
-rw-r--r-- | libhb/decsrtsub.c | 219 |
1 files changed, 140 insertions, 79 deletions
diff --git a/libhb/decsrtsub.c b/libhb/decsrtsub.c index 15015aae2..7f1602b74 100644 --- a/libhb/decsrtsub.c +++ b/libhb/decsrtsub.c @@ -17,6 +17,7 @@ struct start_and_end { enum { k_state_inEntry, + k_state_inEntry_or_new, k_state_potential_new_entry, k_state_timecode, }; @@ -43,6 +44,7 @@ struct hb_work_private_s int utf8_end; unsigned long current_time; unsigned long number_of_entries; + unsigned long last_entry_number; unsigned long current_state; srt_entry_t current_entry; iconv_t *iconv_context; @@ -51,20 +53,27 @@ struct hb_work_private_s uint64_t stop_time; // In HB time }; -static struct start_and_end read_time_from_string( const char* timeString ) +static int +read_time_from_string( const char* timeString, struct start_and_end *result ) { // for ex. 00:00:15,248 --> 00:00:16,545 long houres1, minutes1, seconds1, milliseconds1, - houres2, minutes2, seconds2, milliseconds2; + houres2, minutes2, seconds2, milliseconds2; + int scanned; - sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n", &houres1, &minutes1, &seconds1, &milliseconds1, - &houres2, &minutes2, &seconds2, &milliseconds2); - - struct start_and_end result = { - milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000, - milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000}; - return result; + scanned = sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n", + &houres1, &minutes1, &seconds1, &milliseconds1, + &houres2, &minutes2, &seconds2, &milliseconds2); + if (scanned != 8) + { + return 0; + } + result->start = + milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000; + result->end = + milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000; + return 1; } static int utf8_fill( hb_work_private_t * pv ) @@ -174,19 +183,30 @@ static int get_line( hb_work_private_t * pv, char *buf, int size ) static hb_buffer_t *srt_read( hb_work_private_t *pv ) { char line_buffer[1024]; + int reprocess = 0, resync = 0; if( !pv->file ) { return NULL; } - while( get_line( pv, line_buffer, sizeof( line_buffer ) ) ) + while( reprocess || get_line( pv, line_buffer, sizeof( line_buffer ) ) ) { + reprocess = 0; switch (pv->current_state) { case k_state_timecode: { - struct start_and_end timing = read_time_from_string( line_buffer ); + struct start_and_end timing; + int result; + + result = read_time_from_string( line_buffer, &timing ); + if (!result) + { + resync = 1; + pv->current_state = k_state_potential_new_entry; + continue; + } pv->current_entry.duration = timing.end - timing.start; pv->current_entry.offset = timing.start - pv->current_time; @@ -196,9 +216,36 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) pv->current_entry.stop = timing.end; pv->current_state = k_state_inEntry; - continue; + continue; + } + + case k_state_inEntry_or_new: + { + char *endpoint; + long entry_number; + /* + * Is this really new next entry begin? + */ + entry_number = strtol(line_buffer, &endpoint, 10); + if (endpoint == line_buffer || + (endpoint && *endpoint != '\n' && *endpoint != '\r')) + { + /* + * Doesn't resemble an entry number + * must still be in an entry + */ + if (!resync) + { + reprocess = 1; + pv->current_state = k_state_inEntry; + } + continue; + } + reprocess = 1; + pv->current_state = k_state_potential_new_entry; + break; } - + case k_state_inEntry: { char *q; @@ -218,101 +265,114 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) memcpy(q, line_buffer, size); pv->current_entry.pos += size; pv->current_entry.text[pv->current_entry.pos] = '\0'; - break; + break; } - + case k_state_potential_new_entry: { - const char endpoint[] = "\0"; - const unsigned long potential_entry_number = strtol(line_buffer, (char**)&endpoint, 10); + char *endpoint; + long entry_number; hb_buffer_t *buffer = NULL; /* * Is this really new next entry begin? */ - if (potential_entry_number == pv->number_of_entries + 1) + entry_number = strtol(line_buffer, &endpoint, 10); + if (!resync && (*line_buffer == '\n' || *line_buffer == '\r')) { /* - * We found the next entry - or a really rare error condition + * Well.. looks like we are in the wrong mode.. lets add the + * newline we misinterpreted... */ - if( *pv->current_entry.text ) + strncat(pv->current_entry.text, " ", 1024); + pv->current_state = k_state_inEntry_or_new; + continue; + } + if (endpoint == line_buffer || + (endpoint && *endpoint != '\n' && *endpoint != '\r')) + { + /* + * Well.. looks like we are in the wrong mode.. lets add the + * line we misinterpreted... + */ + if (!resync) { - long length; - char *p, *q; - int line = 1; - uint64_t start_time = ( pv->current_entry.start + - pv->subtitle->config.offset ) * 90; - uint64_t stop_time = ( pv->current_entry.stop + - pv->subtitle->config.offset ) * 90; - - if( !( start_time > pv->start_time && stop_time < pv->stop_time ) ) - { - hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time); - memset( &pv->current_entry, 0, sizeof( srt_entry_t ) ); - ++(pv->number_of_entries); - pv->current_state = k_state_timecode; - continue; - } + reprocess = 1; + pv->current_state = k_state_inEntry; + } + continue; + } + /* + * We found the next entry - or a really rare error condition + */ + pv->last_entry_number = entry_number; + resync = 0; + if( *pv->current_entry.text ) + { + long length; + char *p, *q; + int line = 1; + uint64_t start_time = ( pv->current_entry.start + + pv->subtitle->config.offset ) * 90; + uint64_t stop_time = ( pv->current_entry.stop + + pv->subtitle->config.offset ) * 90; + + if( !( start_time > pv->start_time && stop_time < pv->stop_time ) ) + { + hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time); + memset( &pv->current_entry, 0, sizeof( srt_entry_t ) ); + ++(pv->number_of_entries); + pv->current_state = k_state_timecode; + continue; + } - length = strlen( pv->current_entry.text ); + length = strlen( pv->current_entry.text ); - for( q = p = pv->current_entry.text; *p; p++) + for( q = p = pv->current_entry.text; *p; p++) + { + if( *p == '\n' ) { - if( *p == '\n' ) - { - if ( line == 1 ) - { - *q = *p; - line = 2; - } - else - { - *q = ' '; - } - q++; - } - else if( *p != '\r' ) + if ( line == 1 ) { *q = *p; - q++; + line = 2; } else { - length--; + *q = ' '; } + q++; } - *q = '\0'; - - buffer = hb_buffer_init( length + 1 ); - - if( buffer ) + else if( *p != '\r' ) { - buffer->start = start_time - pv->start_time; - buffer->stop = stop_time - pv->start_time; - - memcpy( buffer->data, pv->current_entry.text, length + 1 ); + *q = *p; + q++; + } + else + { + length--; } } - memset( &pv->current_entry, 0, sizeof( srt_entry_t ) ); - ++(pv->number_of_entries); - pv->current_state = k_state_timecode; + *q = '\0'; + + buffer = hb_buffer_init( length + 1 ); + if( buffer ) { - return buffer; + buffer->start = start_time - pv->start_time; + buffer->stop = stop_time - pv->start_time; + + memcpy( buffer->data, pv->current_entry.text, length + 1 ); } - continue; - } - else + } + memset( &pv->current_entry, 0, sizeof( srt_entry_t ) ); + ++(pv->number_of_entries); + pv->current_state = k_state_timecode; + if( buffer ) { - /* - * Well.. looks like we are in the wrong mode.. lets add the - * newline we misinterpreted... - */ - strncat(pv->current_entry.text, " ", 1024); - pv->current_state = k_state_inEntry; + return buffer; } - - break; - } + continue; + } } } @@ -405,6 +465,7 @@ static int decsrtInit( hb_work_object_t * w, hb_job_t * job ) pv->current_state = k_state_potential_new_entry; pv->number_of_entries = 0; + pv->last_entry_number = 0; pv->current_time = 0; pv->subtitle = w->subtitle; |