/* encca_aac.c Copyright (c) 2003-2012 HandBrake Team This file is part of the HandBrake source code Homepage: . It may be used under the terms of the GNU General Public License v2. For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ #include "hb.h" #include "downmix.h" #include #include enum AAC_MODE { AAC_MODE_LC, AAC_MODE_HE }; int encCoreAudioInitLC(hb_work_object_t*, hb_job_t*); int encCoreAudioInitHE(hb_work_object_t*, hb_job_t*); int encCoreAudioInit(hb_work_object_t*, hb_job_t*, enum AAC_MODE mode); int encCoreAudioWork(hb_work_object_t*, hb_buffer_t**, hb_buffer_t**); void encCoreAudioClose(hb_work_object_t*); hb_work_object_t hb_encca_aac = { WORK_ENC_CA_AAC, "AAC encoder (Apple)", encCoreAudioInitLC, encCoreAudioWork, encCoreAudioClose }; hb_work_object_t hb_encca_haac = { WORK_ENC_CA_HAAC, "HE-AAC encoder (Apple)", encCoreAudioInitHE, encCoreAudioWork, encCoreAudioClose }; struct hb_work_private_s { uint8_t *buf; hb_job_t *job; hb_list_t *list; AudioConverterRef converter; unsigned long isamples, isamplesiz, omaxpacket, nchannels; uint64_t pts, ibytes; Float64 osamplerate; uint64_t layout; hb_chan_map_t *ichanmap; }; #define MP4ESDescrTag 0x03 #define MP4DecConfigDescrTag 0x04 #define MP4DecSpecificDescrTag 0x05 // based off of mov_mp4_read_descr_len from mov.c in ffmpeg's libavformat static int readDescrLen(UInt8 **buffer) { int len = 0; int count = 4; while (count--) { int c = *(*buffer)++; len = (len << 7) | (c & 0x7f); if (!(c & 0x80)) break; } return len; } // based off of mov_mp4_read_descr from mov.c in ffmpeg's libavformat static int readDescr(UInt8 **buffer, int *tag) { *tag = *(*buffer)++; return readDescrLen(buffer); } // based off of mov_read_esds from mov.c in ffmpeg's libavformat static long ReadESDSDescExt(void* descExt, UInt8 **buffer, UInt32 *size, int versionFlags) { UInt8 *esds = (UInt8*)descExt; int tag, len; *size = 0; if (versionFlags) esds += 4; // version + flags readDescr(&esds, &tag); esds += 2; // ID if (tag == MP4ESDescrTag) esds++; // priority readDescr(&esds, &tag); if (tag == MP4DecConfigDescrTag) { esds++; // object type id esds++; // stream type esds += 3; // buffer size db esds += 4; // max bitrate esds += 4; // average bitrate len = readDescr(&esds, &tag); if (tag == MP4DecSpecificDescrTag) { *buffer = calloc(1, len + 8); if (*buffer) { memcpy(*buffer, esds, len); *size = len; } } } return noErr; } /*********************************************************************** * hb_work_encCoreAudio_init switches *********************************************************************** * **********************************************************************/ int encCoreAudioInitLC(hb_work_object_t *w, hb_job_t *job) { return encCoreAudioInit(w, job, AAC_MODE_LC); } int encCoreAudioInitHE(hb_work_object_t *w, hb_job_t *job) { return encCoreAudioInit(w, job, AAC_MODE_HE); } /*********************************************************************** * hb_work_encCoreAudio_init *********************************************************************** * **********************************************************************/ int encCoreAudioInit(hb_work_object_t *w, hb_job_t *job, enum AAC_MODE mode) { hb_work_private_t *pv = calloc(1, sizeof(hb_work_private_t)); hb_audio_t *audio = w->audio; AudioStreamBasicDescription input, output; UInt32 tmp, tmpsiz = sizeof(tmp); OSStatus err; w->private_data = pv; pv->job = job; // pass the number of channels used into the private work data pv->nchannels = hb_mixdown_get_discrete_channel_count(audio->config.out.mixdown); bzero(&input, sizeof(AudioStreamBasicDescription)); input.mSampleRate = (Float64)audio->config.out.samplerate; input.mFormatID = kAudioFormatLinearPCM; input.mFormatFlags = (kLinearPCMFormatFlagIsFloat|kAudioFormatFlagsNativeEndian); input.mBytesPerPacket = 4 * pv->nchannels; input.mFramesPerPacket = 1; input.mBytesPerFrame = input.mBytesPerPacket * input.mFramesPerPacket; input.mChannelsPerFrame = pv->nchannels; input.mBitsPerChannel = 32; bzero(&output, sizeof(AudioStreamBasicDescription)); switch (mode) { case AAC_MODE_HE: output.mFormatID = kAudioFormatMPEG4AAC_HE; break; case AAC_MODE_LC: default: output.mFormatID = kAudioFormatMPEG4AAC; break; } output.mSampleRate = (Float64)audio->config.out.samplerate; output.mChannelsPerFrame = pv->nchannels; // let CoreAudio decide the rest // initialise encoder err = AudioConverterNew(&input, &output, &pv->converter); if (err != noErr) { // Retry without the samplerate bzero(&output, sizeof(AudioStreamBasicDescription)); switch (mode) { case AAC_MODE_HE: output.mFormatID = kAudioFormatMPEG4AAC_HE; break; case AAC_MODE_LC: default: output.mFormatID = kAudioFormatMPEG4AAC; break; } output.mChannelsPerFrame = pv->nchannels; err = AudioConverterNew(&input, &output, &pv->converter); if (err != noErr) { hb_log("Error creating an AudioConverter err=%"PRId64" output.mBytesPerFrame=%"PRIu64"", (int64_t)err, (uint64_t)output.mBytesPerFrame); *job->die = 1; return -1; } } // set encoder quality to maximum tmp = kAudioConverterQuality_Max; AudioConverterSetProperty(pv->converter, kAudioConverterCodecQuality, sizeof(tmp), &tmp); if (audio->config.out.bitrate > 0) { // set encoder bitrate control mode to constrained variable tmp = kAudioCodecBitRateControlMode_VariableConstrained; AudioConverterSetProperty(pv->converter, kAudioCodecPropertyBitRateControlMode, sizeof(tmp), &tmp); // get available bitrates AudioValueRange *bitrates; ssize_t bitrateCounts; err = AudioConverterGetPropertyInfo(pv->converter, kAudioConverterApplicableEncodeBitRates, &tmpsiz, NULL); bitrates = malloc(tmpsiz); err = AudioConverterGetProperty(pv->converter, kAudioConverterApplicableEncodeBitRates, &tmpsiz, bitrates); bitrateCounts = tmpsiz / sizeof(AudioValueRange); // set bitrate tmp = audio->config.out.bitrate * 1000; if (tmp < bitrates[0].mMinimum) tmp = bitrates[0].mMinimum; if (tmp > bitrates[bitrateCounts-1].mMinimum) tmp = bitrates[bitrateCounts-1].mMinimum; free(bitrates); if (tmp != audio->config.out.bitrate * 1000) hb_log("encca_aac: sanitizing track %d audio bitrate %d to %"PRIu32"", audio->config.out.track, audio->config.out.bitrate, tmp / 1000); AudioConverterSetProperty(pv->converter, kAudioConverterEncodeBitRate, sizeof(tmp), &tmp); } else if (audio->config.out.quality >= 0) { if (mode != AAC_MODE_LC) { hb_log("encCoreAudioInit: internal error, VBR set but not applicable"); return 1; } // set encoder bitrate control mode to variable tmp = kAudioCodecBitRateControlMode_Variable; AudioConverterSetProperty(pv->converter, kAudioCodecPropertyBitRateControlMode, sizeof(tmp), &tmp); // set quality tmp = audio->config.out.quality; AudioConverterSetProperty(pv->converter, kAudioCodecPropertySoundQualityForVBR, sizeof(tmp), &tmp); } else { hb_log("encCoreAudioInit: internal error, bitrate/quality not set"); return 1; } // get real input tmpsiz = sizeof(input); AudioConverterGetProperty(pv->converter, kAudioConverterCurrentInputStreamDescription, &tmpsiz, &input); // get real output tmpsiz = sizeof(output); AudioConverterGetProperty(pv->converter, kAudioConverterCurrentOutputStreamDescription, &tmpsiz, &output); // set sizes pv->isamplesiz = input.mBytesPerPacket; pv->isamples = output.mFramesPerPacket; pv->osamplerate = output.mSampleRate; audio->config.out.samples_per_frame = pv->isamples; // set channel map and layout (for remapping) pv->ichanmap = audio->config.in.channel_map; switch (audio->config.out.mixdown) { case HB_AMIXDOWN_MONO: pv->layout = AV_CH_LAYOUT_MONO; break; case HB_AMIXDOWN_STEREO: case HB_AMIXDOWN_DOLBY: case HB_AMIXDOWN_DOLBYPLII: pv->layout = AV_CH_LAYOUT_STEREO; break; case HB_AMIXDOWN_6CH: default: pv->layout = AV_CH_LAYOUT_5POINT1; break; } // get maximum output size AudioConverterGetProperty(pv->converter, kAudioConverterPropertyMaximumOutputPacketSize, &tmpsiz, &tmp); pv->omaxpacket = tmp; // get magic cookie (elementary stream descriptor) tmp = HB_CONFIG_MAX_SIZE; AudioConverterGetProperty(pv->converter, kAudioConverterCompressionMagicCookie, &tmp, w->config->extradata.bytes); // CoreAudio returns a complete ESDS, but we only need // the DecoderSpecific info. UInt8* buffer = NULL; ReadESDSDescExt(w->config->extradata.bytes, &buffer, &tmpsiz, 0); w->config->extradata.length = tmpsiz; memmove(w->config->extradata.bytes, buffer, w->config->extradata.length); pv->list = hb_list_init(); pv->buf = NULL; return 0; } /*********************************************************************** * Close *********************************************************************** * **********************************************************************/ void encCoreAudioClose(hb_work_object_t *w) { hb_work_private_t *pv = w->private_data; if (pv != NULL) { if (pv->converter) { AudioConverterDispose(pv->converter); } if (pv->buf != NULL) { free(pv->buf); } hb_list_empty(&pv->list); free(pv); w->private_data = NULL; } } /* Called whenever necessary by AudioConverterFillComplexBuffer */ static OSStatus inInputDataProc(AudioConverterRef converter, UInt32 *npackets, AudioBufferList *buffers, AudioStreamPacketDescription **ignored, void *userdata) { hb_work_private_t *pv = userdata; if (!pv->ibytes) { *npackets = 0; return 1; } if (pv->buf != NULL) { free(pv->buf); } buffers->mBuffers[0].mDataByteSize = MIN(pv->ibytes, pv->isamplesiz * *npackets); pv->buf = calloc(1, buffers->mBuffers[0].mDataByteSize); buffers->mBuffers[0].mData = pv->buf; if (hb_list_bytes(pv->list) >= buffers->mBuffers[0].mDataByteSize) { hb_list_getbytes(pv->list, buffers->mBuffers[0].mData, buffers->mBuffers[0].mDataByteSize, NULL, NULL); } else { *npackets = 0; return 1; } *npackets = buffers->mBuffers[0].mDataByteSize / pv->isamplesiz; pv->ibytes -= buffers->mBuffers[0].mDataByteSize; if (pv->ichanmap != &hb_qt_chan_map) { hb_layout_remap(pv->ichanmap, &hb_qt_chan_map, pv->layout, (float*)buffers->mBuffers[0].mData, *npackets); } return noErr; } /*********************************************************************** * Encode *********************************************************************** * **********************************************************************/ static hb_buffer_t* Encode(hb_work_object_t *w) { hb_work_private_t *pv = w->private_data; UInt32 npackets = 1; /* check if we need more data */ if ((pv->ibytes = hb_list_bytes(pv->list)) < pv->isamples * pv->isamplesiz) { return NULL; } hb_buffer_t *obuf; AudioStreamPacketDescription odesc = { 0 }; AudioBufferList obuflist = { .mNumberBuffers = 1, .mBuffers = { { .mNumberChannels = pv->nchannels } }, }; obuf = hb_buffer_init(pv->omaxpacket); obuflist.mBuffers[0].mDataByteSize = obuf->size; obuflist.mBuffers[0].mData = obuf->data; OSStatus err = AudioConverterFillComplexBuffer(pv->converter, inInputDataProc, pv, &npackets, &obuflist, &odesc); if (err != noErr && err != 1) { hb_log("encCoreAudio: unexpected error in AudioConverterFillComplexBuffer()"); } // only drop the output buffer if it's actually empty if (!odesc.mDataByteSize || !npackets) { hb_log("encCoreAudio: 0 packets returned"); return NULL; } obuf->size = odesc.mDataByteSize; obuf->s.start = pv->pts; pv->pts += 90000LL * pv->isamples / pv->osamplerate; obuf->s.stop = pv->pts; obuf->s.type = AUDIO_BUF; obuf->s.frametype = HB_FRAME_AUDIO; return obuf; } static hb_buffer_t* Flush(hb_work_object_t *w, hb_buffer_t *bufin) { hb_work_private_t *pv = w->private_data; // pad whatever data we have out to four input frames. int nbytes = hb_list_bytes(pv->list); int pad = pv->isamples * pv->isamplesiz - nbytes; if (pad > 0) { hb_buffer_t *tmp = hb_buffer_init(pad); memset(tmp->data, 0, pad); hb_list_add(pv->list, tmp); } hb_buffer_t *bufout = NULL, *buf = NULL; while (hb_list_bytes(pv->list) >= pv->isamples * pv->isamplesiz) { hb_buffer_t *b = Encode(w); if (b != NULL) { if (bufout == NULL) { bufout = b; } else { buf->next = b; } buf = b; } } // add the eof marker to the end of our buf chain if (buf != NULL) { buf->next = bufin; } else { bufout = bufin; } return bufout; } /*********************************************************************** * Work *********************************************************************** * **********************************************************************/ int encCoreAudioWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) { hb_work_private_t *pv = w->private_data; hb_buffer_t *buf; if ((*buf_in)->size <= 0) { // EOF on input. Finish encoding what we have buffered then send // it & the eof downstream. *buf_out = Flush(w, *buf_in); *buf_in = NULL; return HB_WORK_DONE; } hb_list_add(pv->list, *buf_in); *buf_in = NULL; *buf_out = buf = Encode(w); while (buf != NULL) { buf->next = Encode(w); buf = buf->next; } return HB_WORK_OK; }