2 files changed, 178 insertions, 0 deletions
diff --git a/contrib/ffmpeg/A10-mp4-aac-roll.patch b/contrib/ffmpeg/A10-mp4-aac-roll.patch
new file mode 100644
index 000000000..66fb59e00
--- /dev/null
+++ b/contrib/ffmpeg/A10-mp4-aac-roll.patch
@@ -0,0 +1,76 @@
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index 840190d..92d923e 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
+     return update_size(pb, pos);
+ }
+ 
++static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
++{
++    int64_t pos = avio_tell(pb);
++    avio_wb32(pb, 0); /* size */
++
++    ffio_wfourcc(pb, "sgpd");
++    avio_w8(pb, 1); /* version */
++    avio_w8(pb, 0); /* flags (1) */
++    avio_wb16(pb, 0); /* flags (2) */
++    ffio_wfourcc(pb, "roll"); /* grouping type */
++    avio_wb32(pb, 2); /* table entry length */
++    avio_wb32(pb, 1); /* table entry count */
++
++    /* table data, roll distance
++     * i.e. number of audio frames to pre-roll after a seek */
++    avio_wb16(pb, roll);
++
++    return update_size(pb, pos);
++}
++
++static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
++{
++    int count = 0;
++    int i;
++    int64_t pos;
++
++    for (i = 0; i < track->entry; i++)
++    {
++        count += track->cluster[i].entries;
++    }
++
++    pos = avio_tell(pb);
++    avio_wb32(pb, 0); /* size */
++
++    ffio_wfourcc(pb, "sbgp"); /* atom name */
++    avio_wb32(pb, 0); /* version & flags */
++    ffio_wfourcc(pb, "roll"); /* grouping type */
++    avio_wb32(pb, 1); /* table entry count */
++
++    /* table data */
++    avio_wb32(pb, count);
++    /* sgpd table index, index values are 1 based
++     * we write 'roll' sample group at index 1 */
++    avio_wb32(pb, 1);
++
++    return update_size(pb, pos);
++}
++
+ /* Sample size atom */
+ static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
+ {
+@@ -1277,6 +1325,16 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
+     mov_write_stsc_tag(pb, track);
+     mov_write_stsz_tag(pb, track);
+     mov_write_stco_tag(pb, track);
++
++    /* Add sgpd and sbgp tags for AAC tracks
++     * Apple documentation says they use this as a flag to indicate
++     * that AAC encoder delay is explicitely set in the edit list */
++    if (track->par->codec_id == AV_CODEC_ID_AAC)
++    {
++        mov_write_sgpd_tag(pb, -1);
++        mov_write_sbgp_tag(pb, track);
++    }
++
+     return update_size(pb, pos);
+ }
+ 
diff --git a/contrib/ffmpeg/A11-elst-audio-pad.patch b/contrib/ffmpeg/A11-elst-audio-pad.patch
new file mode 100644
index 000000000..9992e6944
--- /dev/null
+++ b/contrib/ffmpeg/A11-elst-audio-pad.patch
@@ -0,0 +1,102 @@
+From 2cd88516eee1a7588383dcff21f1b8e04a4b05d9 Mon Sep 17 00:00:00 2001
+From: John Stebbins <[email protected]>
+Date: Sun, 19 Feb 2017 14:02:24 -0700
+Subject: [PATCH] movenc: factor initial_padding into edit lists
+
+initial_padding was getting added to the edit list indirectly due to
+initial negative dts.  But in cases where the audio is delayed,
+all or part of initial_padding would be unaccounted for.  This patch
+makes initial_padding explicit.
+---
+ libavformat/movenc.c | 53 +++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 38 insertions(+), 15 deletions(-)
+
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index 689291d..b2c0c92 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -1698,9 +1698,28 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
+                                       track->timescale, AV_ROUND_UP);
+     int version = duration < INT32_MAX ? 0 : 1;
+     int entry_size, entry_count, size;
+-    int64_t delay, start_ct = track->start_cts;
+-    delay = av_rescale_rnd(track->start_dts + start_ct, MOV_TIMESCALE,
++    int64_t delay;
++    int64_t mediatime;
++    int64_t skip = 0;
++
++    delay = av_rescale_rnd(track->start_dts + track->start_cts, MOV_TIMESCALE,
+                            track->timescale, AV_ROUND_DOWN);
++
++    if (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
++        track->par->initial_padding > 0) {
++        /* Adjust delay so that initial_padding gets recorded in the
++         * MediaTime of an edit list entry even in the case that
++         * delay is positive. I.e. we don't want initial_padding to be
++         * absorbed and hidden in the delay. MediaTime must contain
++         * initial_padding in order to know where the actual media
++         * timeline begins. A player should drop samples until MediaTime
++         * is reached */
++        delay += av_rescale_rnd(track->par->initial_padding, MOV_TIMESCALE,
++                                track->par->sample_rate, AV_ROUND_DOWN);
++        skip = av_rescale_rnd(track->par->initial_padding,
++                              track->timescale,
++                              track->par->sample_rate, AV_ROUND_DOWN);
++    }
+     version |= delay < INT32_MAX ? 0 : 1;
+ 
+     entry_size = (version == 1) ? 20 : 12;
+@@ -1731,33 +1750,37 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
+         }
+         avio_wb32(pb, 0x00010000);
+     } else {
+-        /* Avoid accidentally ending up with start_ct = -1 which has got a
+-         * special meaning. Normally start_ct should end up positive or zero
+-         * here, but use FFMIN in case dts is a a small positive integer
+-         * rounded to 0 when represented in MOV_TIMESCALE units. */
+-        start_ct  = -FFMIN(track->start_dts, 0);
+-        /* Note, this delay is calculated from the pts of the first sample,
+-         * ensuring that we don't reduce the duration for cases with
+-         * dts<0 pts=0. */
+-        duration += delay;
++        /* Avoid accidentally ending up with mediatime = -1 which has got a
++         * special meaning. skip and -track->start_dts are guaranteed to be
++         * positive here, so it is not possible mediatime to be -1 */
++        skip = FFMAX(skip, -track->start_dts - track->start_cts);
+     }
++    mediatime = skip + track->start_cts;
++
++    /* skip is the duration of the media segment that will be dropped
++     * during playback when an edit entry is applied.  The edit entry
++     * duration must be reduced by this amount. */
++    duration -= av_rescale_rnd(skip, MOV_TIMESCALE,
++                               track->timescale, AV_ROUND_UP);
+ 
+     /* For fragmented files, we don't know the full length yet. Setting
+      * duration to 0 allows us to only specify the offset, including
+      * the rest of the content (from all future fragments) without specifying
+      * an explicit duration. */
+-    if (mov->flags & FF_MOV_FLAG_FRAGMENT)
++    if (mov->flags & FF_MOV_FLAG_FRAGMENT || duration < 0)
+         duration = 0;
+ 
+-    /* duration */
++    /* add edit entry that defines the presentation time of the first
++     * sample to render during playback and the duration of the segment */
+     if (version == 1) {
+         avio_wb64(pb, duration);
+-        avio_wb64(pb, start_ct);
++        avio_wb64(pb, mediatime);
+     } else {
+         avio_wb32(pb, duration);
+-        avio_wb32(pb, start_ct);
++        avio_wb32(pb, mediatime);
+     }
+     avio_wb32(pb, 0x00010000);
++
+     return size;
+ }
+ 
+-- 
+2.9.3
+