libav: explicitly write encoder delay and preroll to mp4

This will make it possible to read encoder delay back and drop the samples appropriately. Writing preroll sample group to the mp4 fixes post-processing of the file with Apple tools. If the roll sample group is not present, Apple tools will apply an implicit rule to remove encoder delay which results in the delay being dropped twice.
author: John Stebbins <[email protected]> 2017-02-20 10:49:44 -0700
committer: John Stebbins <[email protected]> 2017-02-20 10:49:44 -0700
commit: 2e75f3ab41f8f4510fe7f1c035b2eed9ba88db66 (patch)
tree: 98bbe9dc650a492b62e3c1f04e8ecd53cd229deb /contrib/ffmpeg
parent: 4d3b79bc0ae3575cdd9aff5483be613ef5a99bbc (diff)
2 files changed, 178 insertions, 0 deletions
diff --git a/contrib/ffmpeg/A10-mp4-aac-roll.patch b/contrib/ffmpeg/A10-mp4-aac-roll.patch
new file mode 100644
index 000000000..66fb59e00
--- /dev/null
+++ b/contrib/ffmpeg/A10-mp4-aac-roll.patch
@@ -0,0 +1,76 @@
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index 840190d..92d923e 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
+     return update_size(pb, pos);
+ }
+ 
++static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
++{
++    int64_t pos = avio_tell(pb);
++    avio_wb32(pb, 0); /* size */
++
++    ffio_wfourcc(pb, "sgpd");
++    avio_w8(pb, 1); /* version */
++    avio_w8(pb, 0); /* flags (1) */
++    avio_wb16(pb, 0); /* flags (2) */
++    ffio_wfourcc(pb, "roll"); /* grouping type */
++    avio_wb32(pb, 2); /* table entry length */
++    avio_wb32(pb, 1); /* table entry count */
++
++    /* table data, roll distance
++     * i.e. number of audio frames to pre-roll after a seek */
++    avio_wb16(pb, roll);
++
++    return update_size(pb, pos);
++}
++
++static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
++{
++    int count = 0;
++    int i;
++    int64_t pos;
++
++    for (i = 0; i < track->entry; i++)
++    {
++        count += track->cluster[i].entries;
++    }
++
++    pos = avio_tell(pb);
++    avio_wb32(pb, 0); /* size */
++
++    ffio_wfourcc(pb, "sbgp"); /* atom name */
++    avio_wb32(pb, 0); /* version & flags */
++    ffio_wfourcc(pb, "roll"); /* grouping type */
++    avio_wb32(pb, 1); /* table entry count */
++
++    /* table data */
++    avio_wb32(pb, count);
++    /* sgpd table index, index values are 1 based
++     * we write 'roll' sample group at index 1 */
++    avio_wb32(pb, 1);
++
++    return update_size(pb, pos);
++}
++
+ /* Sample size atom */
+ static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
+ {
+@@ -1277,6 +1325,16 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
+     mov_write_stsc_tag(pb, track);
+     mov_write_stsz_tag(pb, track);
+     mov_write_stco_tag(pb, track);
++
++    /* Add sgpd and sbgp tags for AAC tracks
++     * Apple documentation says they use this as a flag to indicate
++     * that AAC encoder delay is explicitely set in the edit list */
++    if (track->par->codec_id == AV_CODEC_ID_AAC)
++    {
++        mov_write_sgpd_tag(pb, -1);
++        mov_write_sbgp_tag(pb, track);
++    }
++
+     return update_size(pb, pos);
+ }
+ 
diff --git a/contrib/ffmpeg/A11-elst-audio-pad.patch b/contrib/ffmpeg/A11-elst-audio-pad.patch
new file mode 100644
index 000000000..9992e6944
--- /dev/null
+++ b/contrib/ffmpeg/A11-elst-audio-pad.patch
@@ -0,0 +1,102 @@
+From 2cd88516eee1a7588383dcff21f1b8e04a4b05d9 Mon Sep 17 00:00:00 2001
+From: John Stebbins <[email protected]>
+Date: Sun, 19 Feb 2017 14:02:24 -0700
+Subject: [PATCH] movenc: factor initial_padding into edit lists
+
+initial_padding was getting added to the edit list indirectly due to
+initial negative dts.  But in cases where the audio is delayed,
+all or part of initial_padding would be unaccounted for.  This patch
+makes initial_padding explicit.
+---
+ libavformat/movenc.c | 53 +++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 38 insertions(+), 15 deletions(-)
+
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index 689291d..b2c0c92 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -1698,9 +1698,28 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
+                                       track->timescale, AV_ROUND_UP);
+     int version = duration < INT32_MAX ? 0 : 1;
+     int entry_size, entry_count, size;
+-    int64_t delay, start_ct = track->start_cts;
+-    delay = av_rescale_rnd(track->start_dts + start_ct, MOV_TIMESCALE,
++    int64_t delay;
++    int64_t mediatime;
++    int64_t skip = 0;
++
++    delay = av_rescale_rnd(track->start_dts + track->start_cts, MOV_TIMESCALE,
+                            track->timescale, AV_ROUND_DOWN);
++
++    if (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
++        track->par->initial_padding > 0) {
++        /* Adjust delay so that initial_padding gets recorded in the
++         * MediaTime of an edit list entry even in the case that
++         * delay is positive. I.e. we don't want initial_padding to be
++         * absorbed and hidden in the delay. MediaTime must contain
++         * initial_padding in order to know where the actual media
++         * timeline begins. A player should drop samples until MediaTime
++         * is reached */
++        delay += av_rescale_rnd(track->par->initial_padding, MOV_TIMESCALE,
++                                track->par->sample_rate, AV_ROUND_DOWN);
++        skip = av_rescale_rnd(track->par->initial_padding,
++                              track->timescale,
++                              track->par->sample_rate, AV_ROUND_DOWN);
++    }
+     version |= delay < INT32_MAX ? 0 : 1;
+ 
+     entry_size = (version == 1) ? 20 : 12;
+@@ -1731,33 +1750,37 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
+         }
+         avio_wb32(pb, 0x00010000);
+     } else {
+-        /* Avoid accidentally ending up with start_ct = -1 which has got a
+-         * special meaning. Normally start_ct should end up positive or zero
+-         * here, but use FFMIN in case dts is a a small positive integer
+-         * rounded to 0 when represented in MOV_TIMESCALE units. */
+-        start_ct  = -FFMIN(track->start_dts, 0);
+-        /* Note, this delay is calculated from the pts of the first sample,
+-         * ensuring that we don't reduce the duration for cases with
+-         * dts<0 pts=0. */
+-        duration += delay;
++        /* Avoid accidentally ending up with mediatime = -1 which has got a
++         * special meaning. skip and -track->start_dts are guaranteed to be
++         * positive here, so it is not possible mediatime to be -1 */
++        skip = FFMAX(skip, -track->start_dts - track->start_cts);
+     }
++    mediatime = skip + track->start_cts;
++
++    /* skip is the duration of the media segment that will be dropped
++     * during playback when an edit entry is applied.  The edit entry
++     * duration must be reduced by this amount. */
++    duration -= av_rescale_rnd(skip, MOV_TIMESCALE,
++                               track->timescale, AV_ROUND_UP);
+ 
+     /* For fragmented files, we don't know the full length yet. Setting
+      * duration to 0 allows us to only specify the offset, including
+      * the rest of the content (from all future fragments) without specifying
+      * an explicit duration. */
+-    if (mov->flags & FF_MOV_FLAG_FRAGMENT)
++    if (mov->flags & FF_MOV_FLAG_FRAGMENT || duration < 0)
+         duration = 0;
+ 
+-    /* duration */
++    /* add edit entry that defines the presentation time of the first
++     * sample to render during playback and the duration of the segment */
+     if (version == 1) {
+         avio_wb64(pb, duration);
+-        avio_wb64(pb, start_ct);
++        avio_wb64(pb, mediatime);
+     } else {
+         avio_wb32(pb, duration);
+-        avio_wb32(pb, start_ct);
++        avio_wb32(pb, mediatime);
+     }
+     avio_wb32(pb, 0x00010000);
++
+     return size;
+ }
+ 
+-- 
+2.9.3
+
author	John Stebbins <[email protected]>	2017-02-20 10:49:44 -0700
committer	John Stebbins <[email protected]>	2017-02-20 10:49:44 -0700
commit	2e75f3ab41f8f4510fe7f1c035b2eed9ba88db66 (patch)
tree	98bbe9dc650a492b62e3c1f04e8ecd53cd229deb /contrib/ffmpeg
parent	4d3b79bc0ae3575cdd9aff5483be613ef5a99bbc (diff)