summaryrefslogtreecommitdiffstats
path: root/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp')
-rw-r--r--dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp233
1 files changed, 233 insertions, 0 deletions
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
new file mode 100644
index 000000000..f867ec494
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
@@ -0,0 +1,233 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/TaskQueue.h"
+
+#include "FFmpegAudioDecoder.h"
+#include "TimeUnits.h"
+
+#define MAX_CHANNELS 16
+
+namespace mozilla
+{
+
+FFmpegAudioDecoder<LIBAV_VER>::FFmpegAudioDecoder(FFmpegLibWrapper* aLib,
+ TaskQueue* aTaskQueue, MediaDataDecoderCallback* aCallback,
+ const AudioInfo& aConfig)
+ : FFmpegDataDecoder(aLib, aTaskQueue, aCallback, GetCodecId(aConfig.mMimeType))
+{
+ MOZ_COUNT_CTOR(FFmpegAudioDecoder);
+ // Use a new MediaByteBuffer as the object will be modified during initialization.
+ if (aConfig.mCodecSpecificConfig && aConfig.mCodecSpecificConfig->Length()) {
+ mExtraData = new MediaByteBuffer;
+ mExtraData->AppendElements(*aConfig.mCodecSpecificConfig);
+ }
+}
+
+RefPtr<MediaDataDecoder::InitPromise>
+FFmpegAudioDecoder<LIBAV_VER>::Init()
+{
+ nsresult rv = InitDecoder();
+
+ return rv == NS_OK ? InitPromise::CreateAndResolve(TrackInfo::kAudioTrack, __func__)
+ : InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__);
+}
+
+void
+FFmpegAudioDecoder<LIBAV_VER>::InitCodecContext()
+{
+ MOZ_ASSERT(mCodecContext);
+ // We do not want to set this value to 0 as FFmpeg by default will
+ // use the number of cores, which with our mozlibavutil get_cpu_count
+ // isn't implemented.
+ mCodecContext->thread_count = 1;
+ // FFmpeg takes this as a suggestion for what format to use for audio samples.
+ // LibAV 0.8 produces rubbish float interleaved samples, request 16 bits audio.
+ mCodecContext->request_sample_fmt =
+ (mLib->mVersion == 53) ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLT;
+}
+
+static AlignedAudioBuffer
+CopyAndPackAudio(AVFrame* aFrame, uint32_t aNumChannels, uint32_t aNumAFrames)
+{
+ MOZ_ASSERT(aNumChannels <= MAX_CHANNELS);
+
+ AlignedAudioBuffer audio(aNumChannels * aNumAFrames);
+ if (!audio) {
+ return audio;
+ }
+
+ if (aFrame->format == AV_SAMPLE_FMT_FLT) {
+ // Audio data already packed. No need to do anything other than copy it
+ // into a buffer we own.
+ memcpy(audio.get(), aFrame->data[0],
+ aNumChannels * aNumAFrames * sizeof(AudioDataValue));
+ } else if (aFrame->format == AV_SAMPLE_FMT_FLTP) {
+ // Planar audio data. Pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = data[channel][frame];
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S16) {
+ // Audio data already packed. Need to convert from S16 to 32 bits Float
+ AudioDataValue* tmp = audio.get();
+ int16_t* data = reinterpret_cast<int16_t**>(aFrame->data)[0];
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(*data++);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S16P) {
+ // Planar audio data. Convert it from S16 to 32 bits float
+ // and pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ int16_t** data = reinterpret_cast<int16_t**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(data[channel][frame]);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S32) {
+ // Audio data already packed. Need to convert from S16 to 32 bits Float
+ AudioDataValue* tmp = audio.get();
+ int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0];
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(*data++);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S32P) {
+ // Planar audio data. Convert it from S32 to 32 bits float
+ // and pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ int32_t** data = reinterpret_cast<int32_t**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(data[channel][frame]);
+ }
+ }
+ }
+
+ return audio;
+}
+
+MediaResult
+FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample)
+{
+ AVPacket packet;
+ mLib->av_init_packet(&packet);
+
+ packet.data = const_cast<uint8_t*>(aSample->Data());
+ packet.size = aSample->Size();
+
+ if (!PrepareFrame()) {
+ return MediaResult(
+ NS_ERROR_OUT_OF_MEMORY,
+ RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame"));
+ }
+
+ int64_t samplePosition = aSample->mOffset;
+ media::TimeUnit pts = media::TimeUnit::FromMicroseconds(aSample->mTime);
+
+ while (packet.size > 0) {
+ int decoded;
+ int bytesConsumed =
+ mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet);
+
+ if (bytesConsumed < 0) {
+ NS_WARNING("FFmpeg audio decoder error.");
+ return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+ RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed));
+ }
+
+ if (decoded) {
+ if (mFrame->format != AV_SAMPLE_FMT_FLT &&
+ mFrame->format != AV_SAMPLE_FMT_FLTP &&
+ mFrame->format != AV_SAMPLE_FMT_S16 &&
+ mFrame->format != AV_SAMPLE_FMT_S16P &&
+ mFrame->format != AV_SAMPLE_FMT_S32 &&
+ mFrame->format != AV_SAMPLE_FMT_S32P) {
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_DECODE_ERR,
+ RESULT_DETAIL(
+ "FFmpeg audio decoder outputs unsupported audio format"));
+ }
+ uint32_t numChannels = mCodecContext->channels;
+ AudioConfig::ChannelLayout layout(numChannels);
+ if (!layout.IsValid()) {
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_FATAL_ERR,
+ RESULT_DETAIL("Unsupported channel layout:%u", numChannels));
+ }
+
+ uint32_t samplingRate = mCodecContext->sample_rate;
+
+ AlignedAudioBuffer audio =
+ CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples);
+ if (!audio) {
+ return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__);
+ }
+
+ media::TimeUnit duration =
+ FramesToTimeUnit(mFrame->nb_samples, samplingRate);
+ if (!duration.IsValid()) {
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+ RESULT_DETAIL("Invalid sample duration"));
+ }
+
+ RefPtr<AudioData> data = new AudioData(samplePosition,
+ pts.ToMicroseconds(),
+ duration.ToMicroseconds(),
+ mFrame->nb_samples,
+ Move(audio),
+ numChannels,
+ samplingRate);
+ mCallback->Output(data);
+ pts += duration;
+ if (!pts.IsValid()) {
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+ RESULT_DETAIL("Invalid count of accumulated audio samples"));
+ }
+ }
+ packet.data += bytesConsumed;
+ packet.size -= bytesConsumed;
+ samplePosition += bytesConsumed;
+ }
+ return NS_OK;
+}
+
+void
+FFmpegAudioDecoder<LIBAV_VER>::ProcessDrain()
+{
+ ProcessFlush();
+ mCallback->DrainComplete();
+}
+
+AVCodecID
+FFmpegAudioDecoder<LIBAV_VER>::GetCodecId(const nsACString& aMimeType)
+{
+ if (aMimeType.EqualsLiteral("audio/mpeg")) {
+ return AV_CODEC_ID_MP3;
+ } else if (aMimeType.EqualsLiteral("audio/flac")) {
+ return AV_CODEC_ID_FLAC;
+ } else if (aMimeType.EqualsLiteral("audio/mp4a-latm")) {
+ return AV_CODEC_ID_AAC;
+ }
+
+ return AV_CODEC_ID_NONE;
+}
+
+FFmpegAudioDecoder<LIBAV_VER>::~FFmpegAudioDecoder()
+{
+ MOZ_COUNT_DTOR(FFmpegAudioDecoder);
+}
+
+} // namespace mozilla