/* dectx3gsub.c Copyright (c) 2003-2014 HandBrake Team This file is part of the HandBrake source code Homepage: . It may be used under the terms of the GNU General Public License v2. For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ /* * Converts TX3G subtitles to UTF-8 subtitles with limited HTML-style markup (, , ). * * TX3G == MPEG 4, Part 17 (ISO/IEC 14496-17) == 3GPP Timed Text (26.245) * A full reference to the format can be found here: * http://www.3gpp.org/ftp/Specs/html-info/26245.htm * * @author David Foster (davidfstr) */ #include #include #include "hb.h" #include "colormap.h" struct hb_work_private_s { int line; // SSA line number }; typedef enum { BOLD = 0x1, ITALIC = 0x2, UNDERLINE = 0x4 } FaceStyleFlag; #define MAX_MARKUP_LEN 40 #define SSA_PREAMBLE_LEN 24 typedef struct { uint16_t startChar; // NOTE: indices in terms of *character* (not: byte) positions uint16_t endChar; uint16_t fontID; uint8_t faceStyleFlags; // FaceStyleFlag uint8_t fontSize; uint32_t textColorRGBA; } StyleRecord; // NOTE: None of these macros check for buffer overflow #define READ_U8() *pos; pos += 1; #define READ_U16() (pos[0] << 8) | pos[1]; pos += 2; #define READ_U32() (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; pos += 4; #define READ_ARRAY(n) pos; pos += n; #define SKIP_ARRAY(n) pos += n; #define WRITE_CHAR(c) {dst[0]=c; dst += 1;} #define FOURCC(str) ((((uint32_t) str[0]) << 24) | \ (((uint32_t) str[1]) << 16) | \ (((uint32_t) str[2]) << 8) | \ (((uint32_t) str[3]) << 0)) #define IS_10xxxxxx(c) ((c & 0xC0) == 0x80) static int write_ssa_markup(char *dst, StyleRecord *style) { if (style == NULL) { sprintf(dst, "{\\r}"); return strlen(dst); } sprintf(dst, "{\\i%d\\b%d\\u%d\\1c&H%X&\\1a&H%02X&}", !!(style->faceStyleFlags & ITALIC), !!(style->faceStyleFlags & BOLD), !!(style->faceStyleFlags & UNDERLINE), HB_RGB_TO_BGR(style->textColorRGBA >> 8), 255 - (style->textColorRGBA & 0xFF)); // SSA alpha is inverted 0==opaque return strlen(dst); } static hb_buffer_t *tx3g_decode_to_ssa(hb_work_private_t *pv, hb_buffer_t *in) { uint8_t *pos = in->data; uint8_t *end = in->data + in->size; uint16_t numStyleRecords = 0; StyleRecord *styleRecords = NULL; /* * Parse the packet as a TX3G TextSample. * * Look for a single StyleBox ('styl') and read all contained StyleRecords. * Ignore all other box types. * * NOTE: Buffer overflows on read are not checked. */ uint16_t textLength = READ_U16(); uint8_t *text = READ_ARRAY(textLength); while ( pos < end ) { /* * Read TextSampleModifierBox */ uint32_t size = READ_U32(); if ( size == 0 ) { size = pos - end; // extends to end of packet } if ( size == 1 ) { hb_log( "dectx3gsub: TextSampleModifierBox has unsupported large size" ); break; } uint32_t type = READ_U32(); if (type == FOURCC("uuid")) { hb_log( "dectx3gsub: TextSampleModifierBox has unsupported extended type" ); break; } if (type == FOURCC("styl")) { // Found a StyleBox. Parse the contained StyleRecords if ( numStyleRecords != 0 ) { hb_log( "dectx3gsub: found additional StyleBoxes on subtitle; skipping" ); SKIP_ARRAY(size); continue; } numStyleRecords = READ_U16(); if (numStyleRecords > 0) styleRecords = calloc(numStyleRecords, sizeof(StyleRecord)); int i; for (i = 0; i < numStyleRecords; i++) { styleRecords[i].startChar = READ_U16(); styleRecords[i].endChar = READ_U16(); styleRecords[i].fontID = READ_U16(); styleRecords[i].faceStyleFlags = READ_U8(); styleRecords[i].fontSize = READ_U8(); styleRecords[i].textColorRGBA = READ_U32(); } } else { // Found some other kind of TextSampleModifierBox. Skip it. SKIP_ARRAY(size); } } /* * Copy text to output buffer, and add HTML markup for the style records */ int maxOutputSize = textLength + SSA_PREAMBLE_LEN + (numStyleRecords * MAX_MARKUP_LEN); hb_buffer_t *out = hb_buffer_init( maxOutputSize ); if ( out == NULL ) goto fail; uint8_t *dst = out->data; uint8_t *start; int charIndex = 0; int styleIndex = 0; sprintf((char*)dst, "%d,,Default,,0,0,0,,", pv->line); dst += strlen((char*)dst); start = dst; for (pos = text, end = text + textLength; pos < end; pos++) { if (IS_10xxxxxx(*pos)) { // Is a non-first byte of a multi-byte UTF-8 character WRITE_CHAR(*pos); continue; // ...without incrementing 'charIndex' } if (styleIndex < numStyleRecords) { if (styleRecords[styleIndex].endChar == charIndex) { if (styleIndex + 1 >= numStyleRecords || styleRecords[styleIndex+1].startChar > charIndex) { dst += write_ssa_markup((char*)dst, NULL); } styleIndex++; } if (styleRecords[styleIndex].startChar == charIndex) { dst += write_ssa_markup((char*)dst, &styleRecords[styleIndex]); } } if (*pos == '\n') { WRITE_CHAR('\\'); WRITE_CHAR('N'); } else { WRITE_CHAR(*pos); } charIndex++; } if (start == dst) { // No text in the subtitle. This sub is just filler, drop it. free(styleRecords); hb_buffer_close(&out); return NULL; } *dst = '\0'; dst++; // Trim output buffer to the actual amount of data written out->size = dst - out->data; // Copy metadata from the input packet to the output packet out->s.frametype = HB_FRAME_SUBTITLE; out->s.start = in->s.start; out->s.stop = in->s.stop; fail: free(styleRecords); return out; } #undef READ_U8 #undef READ_U16 #undef READ_U32 #undef READ_ARRAY #undef SKIP_ARRAY #undef WRITE_CHAR #undef WRITE_START_TAG #undef WRITE_END_TAG static int dectx3gInit( hb_work_object_t * w, hb_job_t * job ) { hb_work_private_t * pv; pv = calloc( 1, sizeof( hb_work_private_t ) ); if (pv == NULL) return 1; w->private_data = pv; // TODO: // parse w->subtitle->extradata txg3 sample description into // SSA format and replace extradata. // For now we just create a generic SSA Script Info. int height = job->title->height - job->crop[0] - job->crop[1]; int width = job->title->width - job->crop[2] - job->crop[3]; hb_subtitle_add_ssa_header(w->subtitle, width, height); return 0; } static int dectx3gWork( hb_work_object_t * w, hb_buffer_t ** buf_in, hb_buffer_t ** buf_out ) { hb_work_private_t * pv = w->private_data; hb_buffer_t * in = *buf_in; if ( in->s.stop == 0 ) { hb_log( "dectx3gsub: subtitle packet lacks duration" ); } if (in->size == 0) { *buf_out = in; *buf_in = NULL; return HB_WORK_DONE; } *buf_out = tx3g_decode_to_ssa(pv, in); return HB_WORK_OK; } static void dectx3gClose( hb_work_object_t * w ) { free(w->private_data); } hb_work_object_t hb_dectx3gsub = { WORK_DECTX3GSUB, "TX3G Subtitle Decoder", dectx3gInit, dectx3gWork, dectx3gClose };