/* muxcommon.c

   Copyright (c) 2003-2014 HandBrake Team
   This file is part of the HandBrake source code
   Homepage: <http://handbrake.fr/>.
   It may be used under the terms of the GNU General Public License v2.
   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
 */
#include "hb.h"
#include "decssasub.h"

#define MIN_BUFFERING (1024*1024*10)
#define MAX_BUFFERING (1024*1024*50)

struct hb_mux_object_s
{
    HB_MUX_COMMON;
};

typedef struct
{
    int         size;   // Size in bits
    uint32_t  * vec;
} hb_bitvec_t;

typedef struct
{
    hb_buffer_t **fifo;
    uint32_t    in;     // number of bufs put into fifo
    uint32_t    out;    // number of bufs taken out of fifo
    uint32_t    flen;   // fifo length (must be power of two)
} mux_fifo_t;

typedef struct
{
    hb_mux_data_t * mux_data;
    uint64_t        frames;
    uint64_t        bytes;
    mux_fifo_t      mf;
    int             buffered_size;
} hb_track_t;

typedef struct
{
    hb_lock_t       * mutex;
    int               ref;
    int               done;
    hb_mux_object_t * m;
    double            pts;        // end time of next muxing chunk
    double            interleave; // size in 90KHz ticks of media chunks we mux
    uint32_t          max_tracks; // total number of tracks allocated
    uint32_t          ntracks;    // total number of tracks we're muxing
    hb_bitvec_t     * eof;        // bitmask of track with eof
    hb_bitvec_t     * rdy;        // bitmask of tracks ready to output
    hb_bitvec_t     * allEof;     // valid bits in eof (all tracks)
    hb_bitvec_t     * allRdy;     // valid bits in rdy (audio & video tracks)
    hb_track_t     ** track;      // tracks to mux 'max_tracks' elements
    int               buffered_size;
} hb_mux_t;

struct hb_work_private_s
{
    hb_job_t * job;
    int        track;
    hb_mux_t * mux;
};


static int hb_bitvec_add_bits(hb_bitvec_t *bv, int bits)
{
    int ii;
    int words_cur = (bv->size + 31) >> 5;
    int words = (bv->size + bits + 31) >> 5;
    if (words > words_cur)
    {
        uint32_t *tmp = realloc(bv->vec, words * sizeof(uint32_t));
        if (tmp == NULL)
        {
            return -1;
        }
        for (ii = words_cur; ii < words; ii++)
            tmp[ii] = 0;
        bv->vec = tmp;
    }
    bv->size += bits;
    return 0;
}

static hb_bitvec_t* hb_bitvec_new(int size)
{
    hb_bitvec_t *bv = calloc(sizeof(hb_bitvec_t), 1);
    hb_bitvec_add_bits(bv, size);
    return bv;
}

static void hb_bitvec_free(hb_bitvec_t **_bv)
{
    hb_bitvec_t *bv = *_bv;
    free(bv->vec);
    free(bv);
    *_bv = NULL;
}

static void hb_bitvec_set(hb_bitvec_t *bv, int n)
{
    if (n >= bv->size)
        return; // Error.  Should never happen.

    int word = n >> 5;
    uint32_t bit = 1 << (n & 0x1F);
    bv->vec[word] |= bit;
}

static void hb_bitvec_clr(hb_bitvec_t *bv, int n)
{
    if (n >= bv->size)
        return; // Error.  Should never happen.

    int word = n >> 5;
    uint32_t bit = 1 << (n & 0x1F);
    bv->vec[word] &= ~bit;
}

static void hb_bitvec_zero(hb_bitvec_t *bv)
{
    int words = (bv->size + 31) >> 5;
    memset(bv->vec, 0, words * sizeof(uint32_t));
}

static int hb_bitvec_bit(hb_bitvec_t *bv, int n)
{
    if (n >= bv->size)
        return 0; // Error.  Should never happen.

    int word = n >> 5;
    uint32_t bit = 1 << (n & 0x1F);
    return !!(bv->vec[word] & bit);
}

static int hb_bitvec_any(hb_bitvec_t *bv)
{
    uint32_t result = 0;;
    int ii;
    int words = (bv->size + 31) >> 5;
    for (ii = 0; ii < words; ii++)
        result |= bv->vec[ii];

    return !!result;
}

static int hb_bitvec_cmp(hb_bitvec_t *bv1, hb_bitvec_t *bv2)
{
    if (bv1->size != bv2->size)
        return 0;

    int ii;
    int words = (bv1->size + 31) >> 5;
    for (ii = 0; ii < words; ii++)
        if (bv1->vec[ii] != bv2->vec[ii])
            return 0;
    return 1;
}

static int hb_bitvec_and_cmp(hb_bitvec_t *bv1, hb_bitvec_t *bv2, hb_bitvec_t *bv3)
{
    if (bv1->size != bv2->size)
        return 0;

    int ii;
    int words = (bv1->size + 31) >> 5;
    for (ii = 0; ii < words; ii++)
        if ((bv1->vec[ii] & bv2->vec[ii]) != bv3->vec[ii])
            return 0;
    return 1;
}

static int hb_bitvec_cpy(hb_bitvec_t *bv1, hb_bitvec_t *bv2)
{
    if (bv1->size < bv2->size)
    {
        int result = hb_bitvec_add_bits(bv1, bv2->size - bv1->size);
        if (result < 0)
            return result;
    }

    int words = (bv1->size + 31) >> 5;
    memcpy(bv1->vec, bv2->vec, words * sizeof(uint32_t));

    return 0;
}

// The muxer handles two different kinds of media: Video and audio tracks
// are continuous: once they start they generate continuous, consecutive
// sequence of bufs until they end. The muxer will time align all continuous
// media tracks so that their data will be well interleaved in the output file.
// (Smooth, low latency playback with minimal player buffering requires that
// data that's going to be presented close together in time also be close
// together in the output file). Since HB's audio and video encoders run at
// different speeds, the time-aligning involves buffering *all* the continuous
// media tracks until a frame with a timestamp beyond the current alignment
// point arrives on the slowest fifo (usually the video encoder).
//
// The other kind of media, subtitles, close-captions, vobsubs and
// similar tracks, are intermittent. They generate frames sporadically or on
// human time scales (seconds) rather than near the video frame rate (milliseconds).
// If intermittent sources were treated like continuous sources huge sections of
// audio and video would get buffered waiting for the next subtitle to show up.
// To keep this from happening the muxer doesn't wait for intermittent tracks
// (essentially it assumes that they will always go through the HB processing
// pipeline faster than the associated video). They are still time aligned and
// interleaved at the appropriate point in the output file.

// This routine adds another track for the muxer to process. The media input
// stream will be read from HandBrake fifo 'fifo'. Buffers read from that
// stream will be time-aligned with all the other media streams then passed
// to the container-specific 'mux' routine with argument 'mux_data' (see
// routine OutputTrackChunk). 'is_continuous' must be 1 for an audio or video
// track and 0 otherwise (see above).

static void add_mux_track( hb_mux_t *mux, hb_mux_data_t *mux_data,
                           int is_continuous )
{
    if ( mux->ntracks + 1 > mux->max_tracks )
    {
        int max_tracks = mux->max_tracks ? mux->max_tracks * 2 : 32;
        hb_track_t **tmp;
        tmp = realloc(mux->track, max_tracks * sizeof(hb_track_t*));
        if (tmp == NULL)
        {
            hb_error("add_mux_track: realloc failed, too many tracks (>%d)",
                     max_tracks);
            return;
        }
        mux->track = tmp;
        mux->max_tracks = max_tracks;
    }

    hb_track_t *track = calloc( sizeof( hb_track_t ), 1 );
    track->mux_data = mux_data;
    track->mf.flen = 8;
    track->mf.fifo = calloc( sizeof(track->mf.fifo[0]), track->mf.flen );

    int t = mux->ntracks++;
    mux->track[t] = track;
    hb_bitvec_set(mux->allEof, t);
    if (is_continuous)
        hb_bitvec_set(mux->allRdy, t);
}

static int mf_full( hb_track_t * track )
{
    if ( track->buffered_size > MAX_BUFFERING )
        return 1;

    return 0;
}

static void mf_push( hb_mux_t * mux, int tk, hb_buffer_t *buf )
{
    hb_track_t * track = mux->track[tk];
    uint32_t mask = track->mf.flen - 1;
    uint32_t in = track->mf.in;

    hb_buffer_reduce( buf, buf->size );
    if ( track->buffered_size > MAX_BUFFERING )
    {
        hb_bitvec_cpy(mux->rdy, mux->allRdy);
    }
    if ( ( ( in + 1 ) & mask ) == ( track->mf.out & mask ) )
    {
        // fifo is full - expand it to double the current size.
        // This is a bit tricky because when we change the size
        // it changes the modulus (mask) used to convert the in
        // and out counters to fifo indices. Since existing items
        // will be referenced at a new location after the expand
        // we can't just realloc the fifo. If there were
        // hundreds of fifo entries it would be worth it to have code
        // for each of the four possible before/after configurations
        // but these fifos are small so we just allocate a new chunk
        // of memory then do element by element copies using the old &
        // new masks then free the old fifo's memory..
        track->mf.flen *= 2;
        uint32_t nmask = track->mf.flen - 1;
        hb_buffer_t **nfifo = malloc( track->mf.flen * sizeof(*nfifo) );
        int indx = track->mf.out;
        while ( indx != track->mf.in )
        {
            nfifo[indx & nmask] = track->mf.fifo[indx & mask];
            ++indx;
        }
        free( track->mf.fifo );
        track->mf.fifo = nfifo;
        mask = nmask;
    }
    track->mf.fifo[in & mask] = buf;
    track->mf.in = in + 1;
    track->buffered_size += buf->size;
    mux->buffered_size += buf->size;
}

static hb_buffer_t *mf_pull( hb_mux_t * mux, int tk )
{
    hb_track_t *track =mux->track[tk];
    hb_buffer_t *b = NULL;
    if ( track->mf.out != track->mf.in )
    {
        // the fifo isn't empty
        b = track->mf.fifo[track->mf.out & (track->mf.flen - 1)];
        ++track->mf.out;

        track->buffered_size -= b->size;
        mux->buffered_size -= b->size;
    }
    return b;
}

static hb_buffer_t *mf_peek( hb_track_t *track )
{
    return track->mf.out == track->mf.in ?
                NULL : track->mf.fifo[track->mf.out & (track->mf.flen - 1)];
}

static void MoveToInternalFifos( int tk, hb_mux_t *mux, hb_buffer_t * buf )
{
    // move all the buffers on the track's fifo to our internal
    // fifo so that (a) we don't deadlock in the reader and
    // (b) we can control how data from multiple tracks is
    // interleaved in the output file.
    mf_push( mux, tk, buf );
    if ( buf->s.start >= mux->pts )
    {
        // buffer is past our next interleave point so
        // note that this track is ready to be output.
        hb_bitvec_set(mux->rdy, tk);
    }
}

static void OutputTrackChunk( hb_mux_t *mux, int tk, hb_mux_object_t *m )
{
    hb_track_t *track = mux->track[tk];
    hb_buffer_t *buf;

    while ( ( buf = mf_peek( track ) ) != NULL && buf->s.start < mux->pts )
    {
        buf = mf_pull( mux, tk );
        track->frames += 1;
        track->bytes  += buf->size;
        m->mux( m, track->mux_data, buf );
    }
}

static int muxWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
                     hb_buffer_t ** buf_out )
{
    hb_work_private_t * pv = w->private_data;
    hb_job_t    * job = pv->job;
    hb_mux_t    * mux = pv->mux;
    hb_track_t  * track;
    int           i;
    hb_buffer_t * buf = *buf_in;

    hb_lock( mux->mutex );
    if ( mux->done )
    {
        hb_unlock( mux->mutex );
        return HB_WORK_DONE;
    }

    if ( buf->size <= 0 )
    {
        // EOF - mark this track as done
        hb_buffer_close( &buf );
        hb_bitvec_set(mux->eof, pv->track);
        hb_bitvec_set(mux->rdy, pv->track);
    }
    else if ((job->pass != 0 && job->pass != 2) ||
             hb_bitvec_bit(mux->eof, pv->track))
    {
        hb_buffer_close( &buf );
    }
    else
    {
        MoveToInternalFifos( pv->track, mux, buf );
    }
    *buf_in = NULL;

    if (!hb_bitvec_and_cmp(mux->rdy, mux->allRdy, mux->allRdy) &&
        !hb_bitvec_and_cmp(mux->eof, mux->allEof, mux->allEof))
    {
        hb_unlock( mux->mutex );
        return HB_WORK_OK;
    }

    hb_bitvec_t *more;
    more = hb_bitvec_new(0);
    hb_bitvec_cpy(more, mux->rdy);
    // all tracks have at least 'interleave' ticks of data. Output
    // all that we can in 'interleave' size chunks.
    while ((hb_bitvec_and_cmp(mux->rdy, mux->allRdy, mux->allRdy) &&
            hb_bitvec_any(more) && mux->buffered_size > MIN_BUFFERING ) ||
           (hb_bitvec_cmp(mux->eof, mux->allEof)))
    {
        hb_bitvec_zero(more);
        for ( i = 0; i < mux->ntracks; ++i )
        {
            track = mux->track[i];
            OutputTrackChunk( mux, i, mux->m );
            if ( mf_full( track ) )
            {
                // If the track's fifo is still full, advance
                // the currint interleave point and try again.
                hb_bitvec_cpy(mux->rdy, mux->allRdy);
                break;
            }

            // if the track is at eof or still has data that's past
            // our next interleave point then leave it marked as rdy.
            // Otherwise clear rdy.
            if (hb_bitvec_bit(mux->eof, i) &&
                (track->mf.out == track->mf.in ||
                 track->mf.fifo[(track->mf.in-1) & (track->mf.flen-1)]->s.start
                     < mux->pts + mux->interleave))
            {
                hb_bitvec_clr(mux->rdy, i);
            }
            if ( track->mf.out != track->mf.in )
            {
                hb_bitvec_set(more, i);
            }
        }

        // if all the tracks are at eof we're just purging their
        // remaining data -- keep going until all internal fifos are empty.
        if (hb_bitvec_cmp(mux->eof, mux->allEof))
        {
            for ( i = 0; i < mux->ntracks; ++i )
            {
                if ( mux->track[i]->mf.out != mux->track[i]->mf.in )
                {
                    break;
                }
            }
            if ( i >= mux->ntracks )
            {
                mux->done = 1;
                hb_unlock( mux->mutex );
                hb_bitvec_free(&more);
                return HB_WORK_DONE;
            }
        }
        mux->pts += mux->interleave;
    }
    hb_bitvec_free(&more);

    hb_unlock( mux->mutex );
    return HB_WORK_OK;
}

void muxClose( hb_work_object_t * w )
{
    hb_work_private_t * pv = w->private_data;
    hb_mux_t    * mux = pv->mux;
    hb_job_t    * job = pv->job;
    hb_track_t  * track;
    int           i;

    hb_lock( mux->mutex );
    if ( --mux->ref == 0 )
    {
        // Update state before closing muxer.  Closing the muxer
        // may initiate optimization which can take a while and
        // we want the muxing state to be visible while this is
        // happening.
        if( job->pass == 0 || job->pass == 2 )
        {
            /* Update the UI */
            hb_state_t state;
            state.state = HB_STATE_MUXING;
            state.param.muxing.progress = 0;
            hb_set_state( job->h, &state );
        }

        if( mux->m )
        {
            mux->m->end( mux->m );
            free( mux->m );
        }

        // we're all done muxing -- print final stats and cleanup.
        if( job->pass == 0 || job->pass == 2 )
        {
            hb_stat_t sb;
            uint64_t bytes_total, frames_total;

            if (!hb_stat(job->file, &sb))
            {
                hb_deep_log( 2, "mux: file size, %"PRId64" bytes", (uint64_t) sb.st_size );

                bytes_total  = 0;
                frames_total = 0;
                for( i = 0; i < mux->ntracks; ++i )
                {
                    track = mux->track[i];
                    hb_log( "mux: track %d, %"PRId64" frames, %"PRId64" bytes, %.2f kbps, fifo %d",
                            i, track->frames, track->bytes,
                            90000.0 * track->bytes / mux->pts / 125,
                            track->mf.flen );
                    if( !i && job->vquality < 0 )
                    {
                        /* Video */
                        hb_deep_log( 2, "mux: video bitrate error, %+"PRId64" bytes",
                                (int64_t)(track->bytes - mux->pts * job->vbitrate * 125 / 90000) );
                    }
                    bytes_total  += track->bytes;
                    frames_total += track->frames;
                }

                if( bytes_total && frames_total )
                {
                    hb_deep_log( 2, "mux: overhead, %.2f bytes per frame",
                            (float) ( sb.st_size - bytes_total ) /
                            frames_total );
                }
            }
        }

        for( i = 0; i < mux->ntracks; ++i )
        {
            hb_buffer_t * b;
            track = mux->track[i];
            while ( (b = mf_pull( mux, i )) != NULL )
            {
                hb_buffer_close( &b );
            }
            if( track->mux_data )
            {
                free( track->mux_data );
                free( track->mf.fifo );
            }
            free( track );
        }
        free(mux->track);
        hb_unlock( mux->mutex );
        hb_lock_close( &mux->mutex );
        hb_bitvec_free(&mux->eof);
        hb_bitvec_free(&mux->rdy);
        hb_bitvec_free(&mux->allEof);
        hb_bitvec_free(&mux->allRdy);
        free( mux );
    }
    else
    {
        hb_unlock( mux->mutex );
    }
    free( pv );
    w->private_data = NULL;
}

static void mux_loop( void * _w )
{
    hb_work_object_t  * w = _w;
    hb_work_private_t * pv = w->private_data;
    hb_job_t          * job = pv->job;
    hb_buffer_t       * buf_in;

    while ( !*job->die && w->status != HB_WORK_DONE )
    {
        buf_in = hb_fifo_get_wait( w->fifo_in );
        if ( pv->mux->done )
            break;
        if ( buf_in == NULL )
            continue;
        if ( *job->die )
        {
            if( buf_in )
            {
                hb_buffer_close( &buf_in );
            }
            break;
        }

        w->status = w->work( w, &buf_in, NULL );
        if( buf_in )
        {
            hb_buffer_close( &buf_in );
        }
    }
}

hb_work_object_t * hb_muxer_init( hb_job_t * job )
{
    int           i;
    hb_mux_t    * mux = calloc( sizeof( hb_mux_t ), 1 );
    hb_work_object_t  * w;
    hb_work_object_t  * muxer;

    // The bit vectors must be allocated before hb_thread_init for the
    // audio and subtitle muxer jobs below.
    int bit_vec_size = 1 + hb_list_count(job->list_audio) +
                           hb_list_count(job->list_subtitle);
    mux->rdy = hb_bitvec_new(bit_vec_size);
    mux->eof = hb_bitvec_new(bit_vec_size);
    mux->allRdy = hb_bitvec_new(bit_vec_size);
    mux->allEof = hb_bitvec_new(bit_vec_size);

    mux->mutex = hb_lock_init();

    // set up to interleave track data in blocks of 1 video frame time.
    // (the best case for buffering and playout latency). The container-
    // specific muxers can reblock this into bigger chunks if necessary.
    mux->interleave = 90000. * (double)job->vrate_base / (double)job->vrate;
    mux->pts = mux->interleave;

    /* Get a real muxer */
    if( job->pass == 0 || job->pass == 2)
    {
        switch( job->mux )
        {
        case HB_MUX_AV_MP4:
        case HB_MUX_AV_MKV:
            mux->m = hb_mux_avformat_init( job );
            break;
        default:
            hb_error( "No muxer selected, exiting" );
            *job->done_error = HB_ERROR_INIT;
            *job->die = 1;
            return NULL;
        }
        /* Create file, write headers */
        if( mux->m )
        {
            mux->m->init( mux->m );
        }
    }

    /* Initialize the work objects that will receive fifo data */

    muxer = hb_get_work( WORK_MUX );
    muxer->private_data = calloc( sizeof( hb_work_private_t ), 1 );
    muxer->private_data->job = job;
    muxer->private_data->mux = mux;
    mux->ref++;
    muxer->private_data->track = mux->ntracks;
    muxer->fifo_in = job->fifo_mpeg4;
    add_mux_track( mux, job->mux_data, 1 );
    muxer->done = &muxer->private_data->mux->done;

    for( i = 0; i < hb_list_count( job->list_audio ); i++ )
    {
        hb_audio_t  *audio = hb_list_item( job->list_audio, i );

        w = hb_get_work( WORK_MUX );
        w->private_data = calloc( sizeof( hb_work_private_t ), 1 );
        w->private_data->job = job;
        w->private_data->mux = mux;
        mux->ref++;
        w->private_data->track = mux->ntracks;
        w->fifo_in = audio->priv.fifo_out;
        add_mux_track( mux, audio->priv.mux_data, 1 );
        w->done = &job->done;
        hb_list_add( job->list_work, w );
        w->thread = hb_thread_init( w->name, mux_loop, w, HB_NORMAL_PRIORITY );
    }

    for( i = 0; i < hb_list_count( job->list_subtitle ); i++ )
    {
        hb_subtitle_t  *subtitle = hb_list_item( job->list_subtitle, i );

        if (subtitle->config.dest != PASSTHRUSUB)
            continue;

        w = hb_get_work( WORK_MUX );
        w->private_data = calloc( sizeof( hb_work_private_t ), 1 );
        w->private_data->job = job;
        w->private_data->mux = mux;
        mux->ref++;
        w->private_data->track = mux->ntracks;
        w->fifo_in = subtitle->fifo_out;
        add_mux_track( mux, subtitle->mux_data, 0 );
        w->done = &job->done;
        hb_list_add( job->list_work, w );
        w->thread = hb_thread_init( w->name, mux_loop, w, HB_NORMAL_PRIORITY );
    }
    return muxer;
}

// muxInit does nothing because the muxer has a special initializer
// that takes care of initializing all muxer work objects
static int muxInit( hb_work_object_t * w, hb_job_t * job )
{
    return 0;
}

hb_work_object_t hb_muxer =
{
    WORK_MUX,
    "Muxer",
    muxInit,
    muxWork,
    muxClose
};

#define TX3G_STYLES (HB_STYLE_FLAG_BOLD   |   \
                     HB_STYLE_FLAG_ITALIC |   \
                     HB_STYLE_FLAG_UNDERLINE)

typedef struct style_context_s
{
    uint8_t             * style_atoms;
    int                   style_atom_count;
    hb_subtitle_style_t   current_style;
    int                   style_start;
} style_context_t;

static void update_style_atoms(style_context_t *ctx, int stop)
{
    uint8_t *style_entry;
    uint8_t face = 0;

    style_entry = ctx->style_atoms + 10 + (12 * ctx->style_atom_count);

    if (ctx->current_style.flags & HB_STYLE_FLAG_BOLD)
        face |= 1;
    if (ctx->current_style.flags & HB_STYLE_FLAG_ITALIC)
        face |= 2;
    if (ctx->current_style.flags & HB_STYLE_FLAG_UNDERLINE)
        face |= 4;

    style_entry[0]  = (ctx->style_start >> 8) & 0xff;   // startChar
    style_entry[1]  = ctx->style_start & 0xff;
    style_entry[2]  = (stop >> 8) & 0xff;               // endChar
    style_entry[3]  = stop & 0xff;
    style_entry[4]  = 0;    // font-ID msb
    style_entry[5]  = 1;    // font-ID lsb
    style_entry[6]  = face; // face-style-flags
    style_entry[7]  = 24;   // font-size
    style_entry[8]  = (ctx->current_style.fg_rgb >> 16) & 0xff; // r
    style_entry[9]  = (ctx->current_style.fg_rgb >> 8)  & 0xff; // g
    style_entry[10] = (ctx->current_style.fg_rgb)       & 0xff; // b
    style_entry[11] = ctx->current_style.fg_alpha;              // a

    ctx->style_atom_count++;
}

static void update_style(style_context_t *ctx,
                         hb_subtitle_style_t *style, int pos)
{
    if (ctx->style_start < pos)
    {
        // do we need to add a style atom?
        if (((ctx->current_style.flags  ^ style->flags) & TX3G_STYLES) ||
            ctx->current_style.fg_rgb   != style->fg_rgb               ||
            ctx->current_style.fg_alpha != style->fg_alpha)
        {
            update_style_atoms(ctx, pos - 1);
            ctx->current_style = *style;
            ctx->style_start = pos;
        }
    }
    else
    {
        ctx->current_style = *style;
        ctx->style_start = pos;
    }
}

static void style_context_init(style_context_t *ctx, uint8_t *style_atoms)
{
    memset(ctx, 0, sizeof(*ctx));
    ctx->style_atoms = style_atoms;
    ctx->style_start = INT_MAX;
}

/*
 * Copy the input to output removing markup and adding markup to the style
 * atom where appropriate.
 */
void hb_muxmp4_process_subtitle_style(uint8_t *input,
                                      uint8_t *output,
                                      uint8_t *style_atoms, uint16_t *stylesize)
{
    uint16_t utf8_count = 0;         // utf8 count from start of subtitle
    int consumed, in_pos = 0, out_pos = 0, len, ii, lines;
    style_context_t ctx;
    hb_subtitle_style_t style;
    char *text, *tmp;

    *stylesize = 0;
    style_context_init(&ctx, style_atoms);

    hb_ssa_style_init(&style);

    // Skip past the SSA preamble
    text = (char*)input;
    for (ii = 0; ii < 8; ii++)
    {
        tmp = strchr(text, ',');
        if (tmp == NULL)
            break;
        text = tmp + 1;
    }
    in_pos = text - (char*)input;

    while (input[in_pos] != '\0')
    {
        lines = 1;
        text = hb_ssa_to_text((char*)input + in_pos, &consumed, &style);
        if (text == NULL)
            break;

        // count UTF8 characters, and get length of text
        len = 0;
        for (ii = 0; text[ii] != '\0'; ii++)
        {
            if ((text[ii] & 0xc0) == 0x80)
            {
                utf8_count++;
                hb_deep_log( 3, "mux: Counted %d UTF-8 chrs within subtitle",
                                 utf8_count);
            }
            // By default tx3g only supports 2 lines of text
            // To support more lines, we must enable the virtical placement
            // flag in the tx3g atom and add tbox atoms to the sample
            // data to set the vertical placement for each subtitle.
            // Although tbox defines a rectangle, the QT spec says
            // that only the vertical placement is honored (bummer).
            if (text[ii] == '\n')
            {
                lines++;
                if (lines > 2)
                    text[ii] = ' ';
            }
            len++;
        }
        strcpy((char*)output+out_pos, text);
        free(text);
        out_pos += len;
        in_pos += consumed;
        update_style(&ctx, &style, out_pos - utf8_count);
    }
    // Return to default style at end of line, flushes any pending
    // style changes
    hb_ssa_style_init(&style);
    update_style(&ctx, &style, out_pos - utf8_count);

    // null terminate output string
    output[out_pos] = 0;

    if (ctx.style_atom_count > 0)
    {
        *stylesize = 10 + (ctx.style_atom_count * 12);

        memcpy(style_atoms + 4, "styl", 4);

        style_atoms[0] = 0;
        style_atoms[1] = 0;
        style_atoms[2] = (*stylesize >> 8) & 0xff;
        style_atoms[3] = *stylesize & 0xff;
        style_atoms[8] = (ctx.style_atom_count >> 8) & 0xff;
        style_atoms[9] = ctx.style_atom_count & 0xff;
    }
}