diff options
Diffstat (limited to 'dom/media/encoder/fmp4_muxer')
-rw-r--r-- | dom/media/encoder/fmp4_muxer/AMRBox.cpp | 84 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/AMRBox.h | 50 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/AVCBox.cpp | 87 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/AVCBox.h | 59 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/EVRCBox.cpp | 84 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/EVRCBox.h | 50 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOControl.cpp | 415 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOControl.h | 250 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp | 1550 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h | 781 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp | 234 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOMediaWriter.h | 108 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h | 131 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/MP4ESDS.cpp | 138 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/MP4ESDS.h | 87 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/MuxerOperation.h | 57 | ||||
-rw-r--r-- | dom/media/encoder/fmp4_muxer/moz.build | 22 |
17 files changed, 4187 insertions, 0 deletions
diff --git a/dom/media/encoder/fmp4_muxer/AMRBox.cpp b/dom/media/encoder/fmp4_muxer/AMRBox.cpp new file mode 100644 index 000000000..cd1a34fae --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AMRBox.cpp @@ -0,0 +1,84 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "AMRBox.h" +#include "ISOTrackMetadata.h" + +namespace mozilla { + +nsresult +AMRSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + nsresult rv = amr_special_box->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + + *aBoxSize = size; + return NS_OK; +} + +nsresult +AMRSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = AudioSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = amr_special_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +AMRSampleEntry::AMRSampleEntry(ISOControl* aControl) + : AudioSampleEntry(NS_LITERAL_CSTRING("samr"), aControl) +{ + amr_special_box = new AMRSpecificBox(aControl); + MOZ_COUNT_CTOR(AMRSampleEntry); +} + +AMRSampleEntry::~AMRSampleEntry() +{ + MOZ_COUNT_DTOR(AMRSampleEntry); +} + +nsresult +AMRSpecificBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + rv = frag->GetCSD(amrDecSpecInfo); + NS_ENSURE_SUCCESS(rv, rv); + + size += amrDecSpecInfo.Length(); + *aBoxSize = size; + + return NS_OK; +} + +nsresult +AMRSpecificBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + mControl->Write(amrDecSpecInfo.Elements(), amrDecSpecInfo.Length()); + return NS_OK; +} + +AMRSpecificBox::AMRSpecificBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("damr"), aControl) +{ + MOZ_COUNT_CTOR(AMRSpecificBox); +} + +AMRSpecificBox::~AMRSpecificBox() +{ + MOZ_COUNT_DTOR(AMRSpecificBox); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/AMRBox.h b/dom/media/encoder/fmp4_muxer/AMRBox.h new file mode 100644 index 000000000..645d7f89c --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AMRBox.h @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AMRBOX_h_ +#define AMRBOX_h_ + +#include "nsTArray.h" +#include "MuxerOperation.h" + +namespace mozilla { + +class ISOControl; + +// 3GPP TS 26.244 6.7 'AMRSpecificBox field for AMRSampleEntry box' +// Box type: 'damr' +class AMRSpecificBox : public Box { +public: + // 3GPP members + nsTArray<uint8_t> amrDecSpecInfo; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // AMRSpecificBox methods + AMRSpecificBox(ISOControl* aControl); + ~AMRSpecificBox(); +}; + +// 3GPP TS 26.244 6.5 'AMRSampleEntry box' +// Box type: 'sawb' +class AMRSampleEntry : public AudioSampleEntry { +public: + // 3GPP members + RefPtr<AMRSpecificBox> amr_special_box; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // AMRSampleEntry methods + AMRSampleEntry(ISOControl* aControl); + ~AMRSampleEntry(); +}; + +} + +#endif // AMRBOX_h_ diff --git a/dom/media/encoder/fmp4_muxer/AVCBox.cpp b/dom/media/encoder/fmp4_muxer/AVCBox.cpp new file mode 100644 index 000000000..a45cda8b7 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AVCBox.cpp @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "AVCBox.h" + +namespace mozilla { + +nsresult +AVCSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t avc_box_size = 0; + nsresult rv; + rv = avcConfigBox->Generate(&avc_box_size); + NS_ENSURE_SUCCESS(rv, rv); + + size += avc_box_size; + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +AVCSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = VisualSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = avcConfigBox->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +AVCSampleEntry::AVCSampleEntry(ISOControl* aControl) + : VisualSampleEntry(NS_LITERAL_CSTRING("avc1"), aControl) +{ + avcConfigBox = new AVCConfigurationBox(aControl); + MOZ_COUNT_CTOR(AVCSampleEntry); +} + +AVCSampleEntry::~AVCSampleEntry() +{ + MOZ_COUNT_DTOR(AVCSampleEntry); +} + +AVCConfigurationBox::AVCConfigurationBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("avcC"), aControl) +{ + MOZ_COUNT_CTOR(AVCConfigurationBox); +} + +AVCConfigurationBox::~AVCConfigurationBox() +{ + MOZ_COUNT_DTOR(AVCConfigurationBox); +} + +nsresult +AVCConfigurationBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + FragmentBuffer* frag = mControl->GetFragment(Video_Track); + rv = frag->GetCSD(avcConfig); + NS_ENSURE_SUCCESS(rv, rv); + size += avcConfig.Length(); + *aBoxSize = size; + return NS_OK; +} + +nsresult +AVCConfigurationBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + + mControl->Write(avcConfig.Elements(), avcConfig.Length()); + + return NS_OK; +} + +} diff --git a/dom/media/encoder/fmp4_muxer/AVCBox.h b/dom/media/encoder/fmp4_muxer/AVCBox.h new file mode 100644 index 000000000..9640d9e8f --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AVCBox.h @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AVCBox_h_ +#define AVCBox_h_ + +#include "nsTArray.h" +#include "ISOMediaBoxes.h" + +namespace mozilla { + +class ISOControl; + +// 14496-12 8.5.2.2 +#define resolution_72_dpi 0x00480000 +#define video_depth 0x0018 + +// 14496-15 5.3.4.1 'Sample description name and format' +// Box type: 'avcC' +class AVCConfigurationBox : public Box { +public: + // ISO BMFF members + + // avcConfig is CodecSpecificData from 14496-15 '5.3.4.1 Sample description + // name and format. + // These data are generated by encoder and we encapsulated the generated + // bitstream into box directly. + nsTArray<uint8_t> avcConfig; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // AVCConfigurationBox methods + AVCConfigurationBox(ISOControl* aControl); + ~AVCConfigurationBox(); +}; + +// 14496-15 5.3.4.1 'Sample description name and format' +// Box type: 'avc1' +class AVCSampleEntry : public VisualSampleEntry { +public: + // ISO BMFF members + RefPtr<AVCConfigurationBox> avcConfigBox; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // VisualSampleEntry methods + AVCSampleEntry(ISOControl* aControl); + ~AVCSampleEntry(); +}; + +} + +#endif // AVCBox_h_ diff --git a/dom/media/encoder/fmp4_muxer/EVRCBox.cpp b/dom/media/encoder/fmp4_muxer/EVRCBox.cpp new file mode 100644 index 000000000..096e4013d --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/EVRCBox.cpp @@ -0,0 +1,84 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "EVRCBox.h" +#include "ISOTrackMetadata.h" + +namespace mozilla { + +nsresult +EVRCSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + nsresult rv = evrc_special_box->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + + *aBoxSize = size; + return NS_OK; +} + +nsresult +EVRCSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = AudioSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = evrc_special_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +EVRCSampleEntry::EVRCSampleEntry(ISOControl* aControl) + : AudioSampleEntry(NS_LITERAL_CSTRING("sevc"), aControl) +{ + evrc_special_box = new EVRCSpecificBox(aControl); + MOZ_COUNT_CTOR(EVRCSampleEntry); +} + +EVRCSampleEntry::~EVRCSampleEntry() +{ + MOZ_COUNT_DTOR(EVRCSampleEntry); +} + +nsresult +EVRCSpecificBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + rv = frag->GetCSD(evrcDecSpecInfo); + NS_ENSURE_SUCCESS(rv, rv); + + size += evrcDecSpecInfo.Length(); + *aBoxSize = size; + + return NS_OK; +} + +nsresult +EVRCSpecificBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + mControl->Write(evrcDecSpecInfo.Elements(), evrcDecSpecInfo.Length()); + return NS_OK; +} + +EVRCSpecificBox::EVRCSpecificBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("devc"), aControl) +{ + MOZ_COUNT_CTOR(EVRCSpecificBox); +} + +EVRCSpecificBox::~EVRCSpecificBox() +{ + MOZ_COUNT_DTOR(EVRCSpecificBox); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/EVRCBox.h b/dom/media/encoder/fmp4_muxer/EVRCBox.h new file mode 100644 index 000000000..31355849a --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/EVRCBox.h @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EVRCBOX_h_ +#define EVRCBOX_h_ + +#include "nsTArray.h" +#include "MuxerOperation.h" + +namespace mozilla { + +class ISOControl; + +// 3GPP TS 26.244 6.7 'EVRCSpecificBox field for EVRCSampleEntry box' +// Box type: 'devc' +class EVRCSpecificBox : public Box { +public: + // 3GPP members + nsTArray<uint8_t> evrcDecSpecInfo; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // EVRCSpecificBox methods + EVRCSpecificBox(ISOControl* aControl); + ~EVRCSpecificBox(); +}; + +// 3GPP TS 26.244 6.5 'EVRCSampleEntry box' +// Box type: 'sevc' +class EVRCSampleEntry : public AudioSampleEntry { +public: + // 3GPP members + RefPtr<EVRCSpecificBox> evrc_special_box; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // EVRCSampleEntry methods + EVRCSampleEntry(ISOControl* aControl); + ~EVRCSampleEntry(); +}; + +} + +#endif // EVRCBOX_h_ diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.cpp b/dom/media/encoder/fmp4_muxer/ISOControl.cpp new file mode 100644 index 000000000..6addaeb30 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOControl.cpp @@ -0,0 +1,415 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <time.h> +#include "nsAutoPtr.h" +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "EncodedFrameContainer.h" + +namespace mozilla { + +// For MP4 creation_time and modification_time offset from January 1, 1904 to +// January 1, 1970. +#define iso_time_offset 2082844800 + +FragmentBuffer::FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration) + : mTrackType(aTrackType) + , mFragDuration(aFragDuration) + , mMediaStartTime(0) + , mFragmentNumber(0) + , mLastFrameTimeOfLastFragment(0) + , mEOS(false) +{ + mFragArray.AppendElement(); + MOZ_COUNT_CTOR(FragmentBuffer); +} + +FragmentBuffer::~FragmentBuffer() +{ + MOZ_COUNT_DTOR(FragmentBuffer); +} + +bool +FragmentBuffer::HasEnoughData() +{ + // Audio or video frame is enough to form a moof. + return (mFragArray.Length() > 1); +} + +nsresult +FragmentBuffer::GetCSD(nsTArray<uint8_t>& aCSD) +{ + if (!mCSDFrame) { + return NS_ERROR_FAILURE; + } + aCSD.AppendElements(mCSDFrame->GetFrameData().Elements(), + mCSDFrame->GetFrameData().Length()); + + return NS_OK; +} + +nsresult +FragmentBuffer::AddFrame(EncodedFrame* aFrame) +{ + // already EOS, it rejects all new data. + if (mEOS) { + MOZ_ASSERT(0); + return NS_OK; + } + + EncodedFrame::FrameType type = aFrame->GetFrameType(); + if (type == EncodedFrame::AAC_CSD || type == EncodedFrame::AVC_CSD || + type == EncodedFrame::AMR_AUDIO_CSD || type == EncodedFrame::EVRC_AUDIO_CSD) { + mCSDFrame = aFrame; + // Use CSD's timestamp as the start time. Encoder should send CSD frame first + // and then data frames. + mMediaStartTime = aFrame->GetTimeStamp(); + mFragmentNumber = 1; + return NS_OK; + } + + // if the timestamp is incorrect, abort it. + if (aFrame->GetTimeStamp() < mMediaStartTime) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + mFragArray.LastElement().AppendElement(aFrame); + + // check if current fragment is reach the fragment duration. + if ((aFrame->GetTimeStamp() - mMediaStartTime) >= (mFragDuration * mFragmentNumber)) { + mFragArray.AppendElement(); + mFragmentNumber++; + } + + return NS_OK; +} + +nsresult +FragmentBuffer::GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment, + bool aFlush) +{ + // It should be called only if there is a complete fragment in mFragArray. + if (mFragArray.Length() <= 1 && !mEOS) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + if (aFlush) { + aFragment.SwapElements(mFragArray.ElementAt(0)); + mFragArray.RemoveElementAt(0); + } else { + aFragment.AppendElements(mFragArray.ElementAt(0)); + } + return NS_OK; +} + +uint32_t +FragmentBuffer::GetFirstFragmentSampleNumber() +{ + return mFragArray.ElementAt(0).Length(); +} + +uint32_t +FragmentBuffer::GetFirstFragmentSampleSize() +{ + uint32_t size = 0; + uint32_t len = mFragArray.ElementAt(0).Length(); + for (uint32_t i = 0; i < len; i++) { + size += mFragArray.ElementAt(0).ElementAt(i)->GetFrameData().Length(); + } + return size; +} + +ISOControl::ISOControl(uint32_t aMuxingType) + : mMuxingType(aMuxingType) + , mAudioFragmentBuffer(nullptr) + , mVideoFragmentBuffer(nullptr) + , mFragNum(0) + , mOutputSize(0) + , mBitCount(0) + , mBit(0) +{ + // Create a data array for first mp4 Box, ftyp. + mOutBuffers.SetLength(1); + MOZ_COUNT_CTOR(ISOControl); +} + +ISOControl::~ISOControl() +{ + MOZ_COUNT_DTOR(ISOControl); +} + +uint32_t +ISOControl::GetNextTrackID() +{ + return (mMetaArray.Length() + 1); +} + +uint32_t +ISOControl::GetTrackID(TrackMetadataBase::MetadataKind aKind) +{ + for (uint32_t i = 0; i < mMetaArray.Length(); i++) { + if (mMetaArray[i]->GetKind() == aKind) { + return (i + 1); + } + } + + // Track ID shouldn't be 0. It must be something wrong here. + MOZ_ASSERT(0); + return 0; +} + +nsresult +ISOControl::SetMetadata(TrackMetadataBase* aTrackMeta) +{ + if (aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AAC || + aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AMR || + aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AVC || + aTrackMeta->GetKind() == TrackMetadataBase::METADATA_EVRC) { + mMetaArray.AppendElement(aTrackMeta); + return NS_OK; + } + return NS_ERROR_FAILURE; +} + +nsresult +ISOControl::GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta) +{ + for (uint32_t i = 0; i < mMetaArray.Length() ; i++) { + if (mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AAC || + mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AMR || + mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_EVRC) { + aAudMeta = static_cast<AudioTrackMetadata*>(mMetaArray[i].get()); + return NS_OK; + } + } + return NS_ERROR_FAILURE; +} + +nsresult +ISOControl::GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta) +{ + for (uint32_t i = 0; i < mMetaArray.Length() ; i++) { + if (mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AVC) { + aVidMeta = static_cast<VideoTrackMetadata*>(mMetaArray[i].get()); + return NS_OK; + } + } + return NS_ERROR_FAILURE; +} + +bool +ISOControl::HasAudioTrack() +{ + RefPtr<AudioTrackMetadata> audMeta; + GetAudioMetadata(audMeta); + return audMeta; +} + +bool +ISOControl::HasVideoTrack() +{ + RefPtr<VideoTrackMetadata> vidMeta; + GetVideoMetadata(vidMeta); + return vidMeta; +} + +nsresult +ISOControl::SetFragment(FragmentBuffer* aFragment) +{ + if (aFragment->GetType() == Audio_Track) { + mAudioFragmentBuffer = aFragment; + } else { + mVideoFragmentBuffer = aFragment; + } + return NS_OK; +} + +FragmentBuffer* +ISOControl::GetFragment(uint32_t aType) +{ + if (aType == Audio_Track) { + return mAudioFragmentBuffer; + } else if (aType == Video_Track){ + return mVideoFragmentBuffer; + } + MOZ_ASSERT(0); + return nullptr; +} + +nsresult +ISOControl::GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs) +{ + uint32_t len = mOutBuffers.Length(); + for (uint32_t i = 0; i < len; i++) { + mOutBuffers[i].SwapElements(*aOutputBufs->AppendElement()); + } + return FlushBuf(); +} + +nsresult +ISOControl::FlushBuf() +{ + mOutBuffers.SetLength(1); + return NS_OK; +} + +uint32_t +ISOControl::WriteAVData(nsTArray<uint8_t>& aArray) +{ + MOZ_ASSERT(!mBitCount); + + uint32_t len = aArray.Length(); + if (!len) { + return 0; + } + + mOutputSize += len; + + // The last element already has data, allocated a new element for pointer + // swapping. + if (mOutBuffers.LastElement().Length()) { + mOutBuffers.AppendElement(); + } + // Swap the video/audio data pointer. + mOutBuffers.LastElement().SwapElements(aArray); + // Following data could be boxes, so appending a new uint8_t array here. + mOutBuffers.AppendElement(); + + return len; +} + +uint32_t +ISOControl::WriteBits(uint64_t aBits, size_t aNumBits) +{ + uint8_t output_byte = 0; + + MOZ_ASSERT(aNumBits <= 64); + // TODO: rewritten following with bitset? + for (size_t i = aNumBits; i > 0; i--) { + mBit |= (((aBits >> (i - 1)) & 1) << (8 - ++mBitCount)); + if (mBitCount == 8) { + Write(&mBit, sizeof(uint8_t)); + mBit = 0; + mBitCount = 0; + output_byte++; + } + } + return output_byte; +} + +uint32_t +ISOControl::Write(uint8_t* aBuf, uint32_t aSize) +{ + mOutBuffers.LastElement().AppendElements(aBuf, aSize); + mOutputSize += aSize; + return aSize; +} + +uint32_t +ISOControl::Write(uint8_t aData) +{ + MOZ_ASSERT(!mBitCount); + Write((uint8_t*)&aData, sizeof(uint8_t)); + return sizeof(uint8_t); +} + +uint32_t +ISOControl::GetBufPos() +{ + uint32_t len = mOutBuffers.Length(); + uint32_t pos = 0; + for (uint32_t i = 0; i < len; i++) { + pos += mOutBuffers.ElementAt(i).Length(); + } + return pos; +} + +uint32_t +ISOControl::WriteFourCC(const char* aType) +{ + // Bit operation should be aligned to byte before writing any byte data. + MOZ_ASSERT(!mBitCount); + + uint32_t size = strlen(aType); + if (size == 4) { + return Write((uint8_t*)aType, size); + } + + return 0; +} + +nsresult +ISOControl::GenerateFtyp() +{ + nsresult rv; + uint32_t size; + nsAutoPtr<FileTypeBox> type_box(new FileTypeBox(this)); + rv = type_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + rv = type_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + return NS_OK; +} + +nsresult +ISOControl::GenerateMoov() +{ + nsresult rv; + uint32_t size; + nsAutoPtr<MovieBox> moov_box(new MovieBox(this)); + rv = moov_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + rv = moov_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + return NS_OK; +} + +nsresult +ISOControl::GenerateMoof(uint32_t aTrackType) +{ + mFragNum++; + + nsresult rv; + uint32_t size; + uint64_t first_sample_offset = mOutputSize; + nsAutoPtr<MovieFragmentBox> moof_box(new MovieFragmentBox(aTrackType, this)); + nsAutoPtr<MediaDataBox> mdat_box(new MediaDataBox(aTrackType, this)); + + rv = moof_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + first_sample_offset += size; + rv = mdat_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + first_sample_offset += mdat_box->FirstSampleOffsetInMediaDataBox(); + + // correct offset info + nsTArray<RefPtr<MuxerOperation>> tfhds; + rv = moof_box->Find(NS_LITERAL_CSTRING("tfhd"), tfhds); + NS_ENSURE_SUCCESS(rv, rv); + uint32_t len = tfhds.Length(); + for (uint32_t i = 0; i < len; i++) { + TrackFragmentHeaderBox* tfhd = (TrackFragmentHeaderBox*) tfhds.ElementAt(i).get(); + rv = tfhd->UpdateBaseDataOffset(first_sample_offset); + NS_ENSURE_SUCCESS(rv, rv); + } + + rv = moof_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = mdat_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +uint32_t +ISOControl::GetTime() +{ + return (uint64_t)time(nullptr) + iso_time_offset; +} + +} diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.h b/dom/media/encoder/fmp4_muxer/ISOControl.h new file mode 100644 index 000000000..3c445caee --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOControl.h @@ -0,0 +1,250 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOCOMPOSITOR_H_ +#define ISOCOMPOSITOR_H_ + +#include "mozilla/EndianUtils.h" +#include "nsTArray.h" +#include "ISOTrackMetadata.h" +#include "EncodedFrameContainer.h" + +namespace mozilla { + +class Box; +class ISOControl; + +/** + * This class collects elementary stream data to form a fragment. + * ISOMediaWriter will check if the data is enough; if yes, the corresponding + * moof will be created and write to ISOControl. + * Each audio and video has its own fragment and only one during the whole + * life cycle, when a fragment is formed in ISOControl, Flush() needs to + * be called to reset it. + */ +class FragmentBuffer { +public: + // aTrackType: it could be Audio_Track or Video_Track. + // aFragDuration: it is the fragment duration. (microsecond per unit) + // Audio and video have the same fragment duration. + FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration); + ~FragmentBuffer(); + + // Get samples of first fragment, that will swap all the elements in the + // mFragArray[0] when aFlush = true, and caller is responsible for drop + // EncodedFrame reference count. + nsresult GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment, + bool aFlush = false); + + // Add sample frame to the last element fragment of mFragArray. If sample + // number is enough, it will append a new fragment element. And the new + // sample will be added to the new fragment element of mFragArray. + nsresult AddFrame(EncodedFrame* aFrame); + + // Get total sample size of first complete fragment size. + uint32_t GetFirstFragmentSampleSize(); + + // Get sample number of first complete fragment. + uint32_t GetFirstFragmentSampleNumber(); + + // Check if it accumulates enough frame data. + // It returns true when data is enough to form a fragment. + bool HasEnoughData(); + + // Called by ISOMediaWriter when TrackEncoder has sent the last frame. The + // remains frame data will form the last moof and move the state machine to + // in ISOMediaWriter to last phrase. + nsresult SetEndOfStream() { + mEOS = true; + return NS_OK; + } + bool EOS() { return mEOS; } + + // CSD (codec specific data), it is generated by encoder and the data depends + // on codec type. This data will be sent as a special frame from encoder to + // ISOMediaWriter and pass to this class via AddFrame(). + nsresult GetCSD(nsTArray<uint8_t>& aCSD); + + bool HasCSD() { return mCSDFrame; } + + uint32_t GetType() { return mTrackType; } + + void SetLastFragmentLastFrameTime(uint32_t aTime) { + mLastFrameTimeOfLastFragment = aTime; + } + + uint32_t GetLastFragmentLastFrameTime() { + return mLastFrameTimeOfLastFragment; + } + +private: + uint32_t mTrackType; + + // Fragment duration, microsecond per unit. + uint32_t mFragDuration; + + // Media start time, microsecond per unit. + // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(), + // when the difference between current frame time and mMediaStartTime is + // exceeded current fragment ceiling timeframe, that means current fragment has + // enough data and a new element in mFragArray will be added. + uint64_t mMediaStartTime; + + // Current fragment number. It will be increase when a new element of + // mFragArray is created. + // Note: + // It only means the fragment number of current accumulated frames, not + // the current 'creating' fragment mFragNum in ISOControl. + uint32_t mFragmentNumber; + + // The last frame time stamp of last fragment. It is for calculating the + // play duration of first frame in current fragment. The frame duration is + // defined as "current frame timestamp - last frame timestamp" here. So it + // needs to keep the last timestamp of last fragment. + uint32_t mLastFrameTimeOfLastFragment; + + // Array of fragments, each element has enough samples to form a + // complete fragment. + nsTArray<nsTArray<RefPtr<EncodedFrame>>> mFragArray; + + // Codec specific data frame, it will be generated by encoder and send to + // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends + // on codec type. + RefPtr<EncodedFrame> mCSDFrame; + + // END_OF_STREAM from ContainerWriter + bool mEOS; +}; + +/** + * ISOControl will be carried to each box when box is created. It is the main + * bridge for box to output stream to ContainerWriter and retrieve information. + * ISOControl acts 3 different roles: + * 1. Holds the pointer of audio metadata, video metadata, fragment and + * pass them to boxes. + * 2. Provide the functions to generate the base structure of MP4; they are + * GenerateFtyp, GenerateMoov, GenerateMoof, and GenerateMfra. + * 3. The actually writer used by MuxOperation::Write() in each box. It provides + * writing methods for different kind of data; they are Write, WriteArray, + * WriteBits...etc. + */ +class ISOControl { + +friend class Box; + +public: + ISOControl(uint32_t aMuxingType); + ~ISOControl(); + + nsresult GenerateFtyp(); + nsresult GenerateMoov(); + nsresult GenerateMoof(uint32_t aTrackType); + + // Swap elementary stream pointer to output buffers. + uint32_t WriteAVData(nsTArray<uint8_t>& aArray); + + uint32_t Write(uint8_t* aBuf, uint32_t aSize); + + uint32_t Write(uint8_t aData); + + template <typename T> + uint32_t Write(T aData) { + MOZ_ASSERT(!mBitCount); + + aData = NativeEndian::swapToNetworkOrder(aData); + Write((uint8_t*)&aData, sizeof(T)); + return sizeof(T); + } + + template <typename T> + uint32_t WriteArray(const T &aArray, uint32_t aSize) { + MOZ_ASSERT(!mBitCount); + + uint32_t size = 0; + for (uint32_t i = 0; i < aSize; i++) { + size += Write(aArray[i]); + } + return size; + } + + uint32_t WriteFourCC(const char* aType); + + // Bit writing. Note: it needs to be byte-boundary before using + // others non-bit writing function. + uint32_t WriteBits(uint64_t aBits, size_t aNumBits); + + // This is called by GetContainerData and swap all the buffers to aOutputBuffers. + nsresult GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs); + + // Presentation time in seconds since midnight, Jan. 1, 1904, in UTC time. + uint32_t GetTime(); + + // current fragment number + uint32_t GetCurFragmentNumber() { return mFragNum; } + + nsresult SetFragment(FragmentBuffer* aFragment); + FragmentBuffer* GetFragment(uint32_t aType); + + uint32_t GetMuxingType() { return mMuxingType; } + + nsresult SetMetadata(TrackMetadataBase* aTrackMeta); + nsresult GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta); + nsresult GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta); + + // Track ID is the Metadata index in mMetaArray. It allows only 1 audio + // track and 1 video track in this muxer. In this muxer, it is prohibt to have + // mutiple audio track or video track in the same file. + uint32_t GetTrackID(TrackMetadataBase::MetadataKind aKind); + uint32_t GetNextTrackID(); + + bool HasAudioTrack(); + bool HasVideoTrack(); + +private: + uint32_t GetBufPos(); + nsresult FlushBuf(); + + // One of value in TYPE_XXX, defined in ISOMediaWriter. + uint32_t mMuxingType; + + // Audio and video fragments are owned by ISOMediaWriter. + // They don't need to worry about pointer going stale because ISOMediaWriter's + // lifetime is longer than ISOControl. + FragmentBuffer* mAudioFragmentBuffer; + FragmentBuffer* mVideoFragmentBuffer; + + // Generated fragment number + uint32_t mFragNum; + + // The (index + 1) will be the track ID. + nsTArray<RefPtr<TrackMetadataBase>> mMetaArray; + + // Array of output buffers. + // To save memory usage, audio/video sample will be swapped into a new element + // of this array. + // + // For example, + // mOutBuffers[0] --> boxes (allocated by muxer) + // mOutBuffers[1] --> video raw data (allocated by encoder) + // mOutBuffers[2] --> video raw data (allocated by encoder) + // mOutBuffers[3] --> video raw data (allocated by encoder) + // mOutBuffers[4] --> boxes (allocated by muxer) + // mOutBuffers[5] --> audio raw data (allocated by encoder) + // ...etc. + // + nsTArray<nsTArray<uint8_t>> mOutBuffers; + + // Accumulate output size from Write(). + uint64_t mOutputSize; + + // Bit writing operation. Note: the mBitCount should be 0 before any + // byte-boundary writing method be called (Write(uint32_t), Write(uint16_t)...etc); + // otherwise, there will be assertion on these functions. + uint8_t mBitCount; + uint8_t mBit; +}; + +} +#endif diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp new file mode 100644 index 000000000..32a0c577b --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp @@ -0,0 +1,1550 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include "TrackMetadataBase.h" +#include "ISOMediaBoxes.h" +#include "ISOControl.h" +#include "ISOMediaWriter.h" +#include "EncodedFrameContainer.h" +#include "ISOTrackMetadata.h" +#include "MP4ESDS.h" +#include "AMRBox.h" +#include "AVCBox.h" +#include "EVRCBox.h" +#include "VideoUtils.h" + +namespace mozilla { + +// 14496-12 6.2.2 'Data Types and fields' +const uint32_t iso_matrix[] = { 0x00010000, 0, 0, + 0, 0x00010000, 0, + 0, 0, 0x40000000 }; + +uint32_t +set_sample_flags(bool aSync) +{ + std::bitset<32> flags; + flags.set(16, !aSync); + return flags.to_ulong(); +} + +Box::BoxSizeChecker::BoxSizeChecker(ISOControl* aControl, uint32_t aSize) +{ + mControl = aControl; + ori_size = mControl->GetBufPos(); + box_size = aSize; + MOZ_COUNT_CTOR(BoxSizeChecker); +} + +Box::BoxSizeChecker::~BoxSizeChecker() +{ + uint32_t cur_size = mControl->GetBufPos(); + if ((cur_size - ori_size) != box_size) { + MOZ_ASSERT(false); + } + + MOZ_COUNT_DTOR(BoxSizeChecker); +} + +nsresult +MediaDataBox::Generate(uint32_t* aBoxSize) +{ + mFirstSampleOffset = size; + mAllSampleSize = 0; + + if (mTrackType & Audio_Track) { + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + mAllSampleSize += frag->GetFirstFragmentSampleSize(); + } + if (mTrackType & Video_Track) { + FragmentBuffer* frag = mControl->GetFragment(Video_Track); + mAllSampleSize += frag->GetFirstFragmentSampleSize(); + } + + size += mAllSampleSize; + *aBoxSize = size; + return NS_OK; +} + +nsresult +MediaDataBox::Write() +{ + nsresult rv; + BoxSizeChecker checker(mControl, size); + Box::Write(); + nsTArray<uint32_t> types; + types.AppendElement(Audio_Track); + types.AppendElement(Video_Track); + + for (uint32_t l = 0; l < types.Length(); l++) { + if (mTrackType & types[l]) { + FragmentBuffer* frag = mControl->GetFragment(types[l]); + nsTArray<RefPtr<EncodedFrame>> frames; + + // Here is the last time we get fragment frames, flush it! + rv = frag->GetFirstFragment(frames, true); + NS_ENSURE_SUCCESS(rv, rv); + + uint32_t len = frames.Length(); + for (uint32_t i = 0; i < len; i++) { + nsTArray<uint8_t> frame_buffer; + frames.ElementAt(i)->SwapOutFrameData(frame_buffer); + mControl->WriteAVData(frame_buffer); + } + } + } + + return NS_OK; +} + +MediaDataBox::MediaDataBox(uint32_t aTrackType, ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("mdat"), aControl) + , mAllSampleSize(0) + , mFirstSampleOffset(0) + , mTrackType(aTrackType) +{ + MOZ_COUNT_CTOR(MediaDataBox); +} + +MediaDataBox::~MediaDataBox() +{ + MOZ_COUNT_DTOR(MediaDataBox); +} + +uint32_t +TrackRunBox::fillSampleTable() +{ + uint32_t table_size = 0; + nsresult rv; + nsTArray<RefPtr<EncodedFrame>> frames; + FragmentBuffer* frag = mControl->GetFragment(mTrackType); + + rv = frag->GetFirstFragment(frames); + if (NS_FAILED(rv)) { + return 0; + } + uint32_t len = frames.Length(); + sample_info_table = MakeUnique<tbl[]>(len); + // Create sample table according to 14496-12 8.8.8.2. + for (uint32_t i = 0; i < len; i++) { + // Sample size. + sample_info_table[i].sample_size = 0; + if (flags.to_ulong() & flags_sample_size_present) { + sample_info_table[i].sample_size = frames.ElementAt(i)->GetFrameData().Length(); + mAllSampleSize += sample_info_table[i].sample_size; + table_size += sizeof(uint32_t); + } + + // Sample flags. + sample_info_table[i].sample_flags = 0; + if (flags.to_ulong() & flags_sample_flags_present) { + sample_info_table[i].sample_flags = + set_sample_flags( + (frames.ElementAt(i)->GetFrameType() == EncodedFrame::AVC_I_FRAME)); + table_size += sizeof(uint32_t); + } + + // Sample duration. + sample_info_table[i].sample_duration = 0; + if (flags.to_ulong() & flags_sample_duration_present) { + // Calculate each frame's duration, it is decided by "current frame + // timestamp - last frame timestamp". + uint64_t frame_time = 0; + if (i == 0) { + frame_time = frames.ElementAt(i)->GetTimeStamp() - + frag->GetLastFragmentLastFrameTime(); + } else { + frame_time = frames.ElementAt(i)->GetTimeStamp() - + frames.ElementAt(i - 1)->GetTimeStamp(); + // Keep the last frame time of current fagment, it will be used to calculate + // the first frame duration of next fragment. + if ((len - 1) == i) { + frag->SetLastFragmentLastFrameTime(frames.ElementAt(i)->GetTimeStamp()); + } + } + + // In TrackRunBox, there should be exactly one type, either audio or video. + MOZ_ASSERT((mTrackType & Video_Track) ^ (mTrackType & Audio_Track)); + sample_info_table[i].sample_duration = (mTrackType & Video_Track ? + frame_time * mVideoMeta->GetVideoClockRate() / USECS_PER_S : + frame_time * mAudioMeta->GetAudioSampleRate() / USECS_PER_S); + + table_size += sizeof(uint32_t); + } + + sample_info_table[i].sample_composition_time_offset = 0; + } + return table_size; +} + +nsresult +TrackRunBox::Generate(uint32_t* aBoxSize) +{ + FragmentBuffer* frag = mControl->GetFragment(mTrackType); + sample_count = frag->GetFirstFragmentSampleNumber(); + size += sizeof(sample_count); + + // data_offset needs to be updated if there is other + // TrackRunBox before this one. + if (flags.to_ulong() & flags_data_offset_present) { + data_offset = 0; + size += sizeof(data_offset); + } + size += fillSampleTable(); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +TrackRunBox::SetDataOffset(uint32_t aOffset) +{ + data_offset = aOffset; + return NS_OK; +} + +nsresult +TrackRunBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(sample_count); + if (flags.to_ulong() & flags_data_offset_present) { + mControl->Write(data_offset); + } + for (uint32_t i = 0; i < sample_count; i++) { + if (flags.to_ulong() & flags_sample_duration_present) { + mControl->Write(sample_info_table[i].sample_duration); + } + if (flags.to_ulong() & flags_sample_size_present) { + mControl->Write(sample_info_table[i].sample_size); + } + if (flags.to_ulong() & flags_sample_flags_present) { + mControl->Write(sample_info_table[i].sample_flags); + } + } + + return NS_OK; +} + +TrackRunBox::TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("trun"), 0, aFlags, aControl) + , sample_count(0) + , data_offset(0) + , first_sample_flags(0) + , mAllSampleSize(0) + , mTrackType(aType) +{ + MOZ_COUNT_CTOR(TrackRunBox); +} + +TrackRunBox::~TrackRunBox() +{ + MOZ_COUNT_DTOR(TrackRunBox); +} + +nsresult +TrackFragmentHeaderBox::UpdateBaseDataOffset(uint64_t aOffset) +{ + base_data_offset = aOffset; + return NS_OK; +} + +nsresult +TrackFragmentHeaderBox::Generate(uint32_t* aBoxSize) +{ + track_ID = (mTrackType == Audio_Track ? + mControl->GetTrackID(mAudioMeta->GetKind()) : + mControl->GetTrackID(mVideoMeta->GetKind())); + size += sizeof(track_ID); + + if (flags.to_ulong() & base_data_offset_present) { + // base_data_offset needs to add size of 'trun', 'tfhd' and + // header of 'mdat' later. + base_data_offset = 0; + size += sizeof(base_data_offset); + } + if (flags.to_ulong() & default_sample_duration_present) { + if (mTrackType == Video_Track) { + if (!mVideoMeta->GetVideoFrameRate()) { + // 0 means frame rate is variant, so it is wrong to write + // default_sample_duration. + MOZ_ASSERT(0); + default_sample_duration = 0; + } else { + default_sample_duration = mVideoMeta->GetVideoClockRate() / mVideoMeta->GetVideoFrameRate(); + } + } else if (mTrackType == Audio_Track) { + default_sample_duration = mAudioMeta->GetAudioFrameDuration(); + } else { + MOZ_ASSERT(0); + return NS_ERROR_FAILURE; + } + size += sizeof(default_sample_duration); + } + *aBoxSize = size; + return NS_OK; +} + +nsresult +TrackFragmentHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(track_ID); + if (flags.to_ulong() & base_data_offset_present) { + mControl->Write(base_data_offset); + } + if (flags.to_ulong() & default_sample_duration_present) { + mControl->Write(default_sample_duration); + } + return NS_OK; +} + +TrackFragmentHeaderBox::TrackFragmentHeaderBox(uint32_t aType, + uint32_t aFlags, + ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("tfhd"), 0, aFlags, aControl) + , track_ID(0) + , base_data_offset(0) + , default_sample_duration(0) +{ + mTrackType = aType; + MOZ_COUNT_CTOR(TrackFragmentHeaderBox); +} + +TrackFragmentHeaderBox::~TrackFragmentHeaderBox() +{ + MOZ_COUNT_DTOR(TrackFragmentHeaderBox); +} + +TrackFragmentBox::TrackFragmentBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("traf"), aControl) + , mTrackType(aType) +{ + // Flags in TrackFragmentHeaderBox. + uint32_t tf_flags = base_data_offset_present; + + // Ideally, audio encoder generates audio frame in const rate. However, some + // audio encoders don't do it so the audio frame duration needs to be checked + // here. + if ((mTrackType & Audio_Track) && mAudioMeta->GetAudioFrameDuration()) { + tf_flags |= default_sample_duration_present; + } + + boxes.AppendElement(new TrackFragmentHeaderBox(aType, tf_flags, aControl)); + + // Always adds flags_data_offset_present in each TrackRunBox, Android + // parser requires this flag to calculate the correct bitstream offset. + uint32_t tr_flags = flags_sample_size_present | flags_data_offset_present; + + // Flags in TrackRunBox. + // If there is no default sample duration exists, each frame duration needs to + // be recored in the TrackRunBox. + tr_flags |= (tf_flags & default_sample_duration_present ? 0 : flags_sample_duration_present); + + // For video, add sample_flags to record I frame. + tr_flags |= (mTrackType & Video_Track ? flags_sample_flags_present : 0); + + boxes.AppendElement(new TrackRunBox(mTrackType, tr_flags, aControl)); + MOZ_COUNT_CTOR(TrackFragmentBox); +} + +TrackFragmentBox::~TrackFragmentBox() +{ + MOZ_COUNT_DTOR(TrackFragmentBox); +} + +nsresult +MovieFragmentHeaderBox::Generate(uint32_t* aBoxSize) +{ + sequence_number = mControl->GetCurFragmentNumber(); + size += sizeof(sequence_number); + *aBoxSize = size; + return NS_OK; +} + +nsresult +MovieFragmentHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(sequence_number); + return NS_OK; +} + +MovieFragmentHeaderBox::MovieFragmentHeaderBox(uint32_t aTrackType, + ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("mfhd"), 0, 0, aControl) + , sequence_number(0) + , mTrackType(aTrackType) +{ + MOZ_COUNT_CTOR(MovieFragmentHeaderBox); +} + +MovieFragmentHeaderBox::~MovieFragmentHeaderBox() +{ + MOZ_COUNT_DTOR(MovieFragmentHeaderBox); +} + +MovieFragmentBox::MovieFragmentBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("moof"), aControl) + , mTrackType(aType) +{ + boxes.AppendElement(new MovieFragmentHeaderBox(mTrackType, aControl)); + + if (mTrackType & Audio_Track) { + boxes.AppendElement( + new TrackFragmentBox(Audio_Track, aControl)); + } + if (mTrackType & Video_Track) { + boxes.AppendElement( + new TrackFragmentBox(Video_Track, aControl)); + } + MOZ_COUNT_CTOR(MovieFragmentBox); +} + +MovieFragmentBox::~MovieFragmentBox() +{ + MOZ_COUNT_DTOR(MovieFragmentBox); +} + +nsresult +MovieFragmentBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv = DefaultContainerImpl::Generate(aBoxSize); + NS_ENSURE_SUCCESS(rv, rv); + + // Correct data_offset if there are both audio and video track in + // this fragment. This offset means the offset in the MediaDataBox. + if (mTrackType & (Audio_Track | Video_Track)) { + nsTArray<RefPtr<MuxerOperation>> truns; + rv = Find(NS_LITERAL_CSTRING("trun"), truns); + NS_ENSURE_SUCCESS(rv, rv); + uint32_t len = truns.Length(); + uint32_t data_offset = 0; + for (uint32_t i = 0; i < len; i++) { + TrackRunBox* trun = (TrackRunBox*) truns.ElementAt(i).get(); + rv = trun->SetDataOffset(data_offset); + NS_ENSURE_SUCCESS(rv, rv); + data_offset += trun->GetAllSampleSize(); + } + } + + return NS_OK; +} + +nsresult +TrackExtendsBox::Generate(uint32_t* aBoxSize) +{ + track_ID = (mTrackType == Audio_Track ? + mControl->GetTrackID(mAudioMeta->GetKind()) : + mControl->GetTrackID(mVideoMeta->GetKind())); + + if (mTrackType == Audio_Track) { + default_sample_description_index = 1; + default_sample_duration = mAudioMeta->GetAudioFrameDuration(); + default_sample_size = mAudioMeta->GetAudioFrameSize(); + default_sample_flags = set_sample_flags(1); + } else if (mTrackType == Video_Track) { + default_sample_description_index = 1; + // Video meta data has assigned framerate, it implies that this video's + // frame rate should be fixed. + if (mVideoMeta->GetVideoFrameRate()) { + default_sample_duration = + mVideoMeta->GetVideoClockRate() / mVideoMeta->GetVideoFrameRate(); + } + default_sample_size = 0; + default_sample_flags = set_sample_flags(0); + } else { + MOZ_ASSERT(0); + return NS_ERROR_FAILURE; + } + + size += sizeof(track_ID) + + sizeof(default_sample_description_index) + + sizeof(default_sample_duration) + + sizeof(default_sample_size) + + sizeof(default_sample_flags); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +TrackExtendsBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(track_ID); + mControl->Write(default_sample_description_index); + mControl->Write(default_sample_duration); + mControl->Write(default_sample_size); + mControl->Write(default_sample_flags); + + return NS_OK; +} + +TrackExtendsBox::TrackExtendsBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("trex"), 0, 0, aControl) + , track_ID(0) + , default_sample_description_index(0) + , default_sample_duration(0) + , default_sample_size(0) + , default_sample_flags(0) + , mTrackType(aType) +{ + MOZ_COUNT_CTOR(TrackExtendsBox); +} + +TrackExtendsBox::~TrackExtendsBox() +{ + MOZ_COUNT_DTOR(TrackExtendsBox); +} + +MovieExtendsBox::MovieExtendsBox(ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("mvex"), aControl) +{ + if (mAudioMeta) { + boxes.AppendElement(new TrackExtendsBox(Audio_Track, aControl)); + } + if (mVideoMeta) { + boxes.AppendElement(new TrackExtendsBox(Video_Track, aControl)); + } + MOZ_COUNT_CTOR(MovieExtendsBox); +} + +MovieExtendsBox::~MovieExtendsBox() +{ + MOZ_COUNT_DTOR(MovieExtendsBox); +} + +nsresult +ChunkOffsetBox::Generate(uint32_t* aBoxSize) +{ + // We don't need time to sample table in fragmented mp4. + entry_count = 0; + size += sizeof(entry_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +ChunkOffsetBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + return NS_OK; +} + +ChunkOffsetBox::ChunkOffsetBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stco"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(ChunkOffsetBox); +} + +ChunkOffsetBox::~ChunkOffsetBox() +{ + MOZ_COUNT_DTOR(ChunkOffsetBox); +} + +nsresult +SampleToChunkBox::Generate(uint32_t* aBoxSize) +{ + // We don't need time to sample table in fragmented mp4 + entry_count = 0; + size += sizeof(entry_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +SampleToChunkBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + return NS_OK; +} + +SampleToChunkBox::SampleToChunkBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stsc"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(SampleToChunkBox); +} + +SampleToChunkBox::~SampleToChunkBox() +{ + MOZ_COUNT_DTOR(SampleToChunkBox); +} + +nsresult +TimeToSampleBox::Generate(uint32_t* aBoxSize) +{ + // We don't need time to sample table in fragmented mp4. + entry_count = 0; + size += sizeof(entry_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +TimeToSampleBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + return NS_OK; +} + +TimeToSampleBox::TimeToSampleBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stts"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(TimeToSampleBox); +} + +TimeToSampleBox::~TimeToSampleBox() +{ + MOZ_COUNT_DTOR(TimeToSampleBox); +} + +nsresult +SampleDescriptionBox::Generate(uint32_t* aBoxSize) +{ + entry_count = 1; + size += sizeof(entry_count); + + nsresult rv; + uint32_t box_size; + rv = sample_entry_box->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + *aBoxSize = size; + + return NS_OK; +} + +nsresult +SampleDescriptionBox::Write() +{ + WRITE_FULLBOX(mControl, size) + nsresult rv; + mControl->Write(entry_count); + rv = sample_entry_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +SampleDescriptionBox::SampleDescriptionBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stsd"), 0, 0, aControl) + , entry_count(0) +{ + mTrackType = aType; + + switch (mTrackType) { + case Audio_Track: + { + CreateAudioSampleEntry(sample_entry_box); + } + break; + case Video_Track: + { + CreateVideoSampleEntry(sample_entry_box); + } + break; + } + MOZ_ASSERT(sample_entry_box); + MOZ_COUNT_CTOR(SampleDescriptionBox); +} + +nsresult +SampleDescriptionBox::CreateAudioSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry) +{ + if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_AMR) { + aSampleEntry = new AMRSampleEntry(mControl); + } else if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_AAC) { + aSampleEntry = new MP4AudioSampleEntry(mControl); + } else if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_EVRC) { + aSampleEntry = new EVRCSampleEntry(mControl); + } else { + MOZ_ASSERT(0); + } + return NS_OK; +} + +nsresult +SampleDescriptionBox::CreateVideoSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry) +{ + if (mVideoMeta->GetKind() == TrackMetadataBase::METADATA_AVC) { + aSampleEntry = new AVCSampleEntry(mControl); + } else { + MOZ_ASSERT(0); + } + return NS_OK; +} + +SampleDescriptionBox::~SampleDescriptionBox() +{ + MOZ_COUNT_DTOR(SampleDescriptionBox); +} + +nsresult +SampleSizeBox::Generate(uint32_t* aBoxSize) +{ + size += sizeof(sample_size) + + sizeof(sample_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +SampleSizeBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(sample_size); + mControl->Write(sample_count); + return NS_OK; +} + +SampleSizeBox::SampleSizeBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stsz"), 0, 0, aControl) + , sample_size(0) + , sample_count(0) +{ + MOZ_COUNT_CTOR(SampleSizeBox); +} + +SampleSizeBox::~SampleSizeBox() +{ + MOZ_COUNT_DTOR(SampleSizeBox); +} + +SampleTableBox::SampleTableBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("stbl"), aControl) +{ + boxes.AppendElement(new SampleDescriptionBox(aType, aControl)); + boxes.AppendElement(new TimeToSampleBox(aType, aControl)); + boxes.AppendElement(new SampleToChunkBox(aType, aControl)); + boxes.AppendElement(new SampleSizeBox(aControl)); + boxes.AppendElement(new ChunkOffsetBox(aType, aControl)); + MOZ_COUNT_CTOR(SampleTableBox); +} + +SampleTableBox::~SampleTableBox() +{ + MOZ_COUNT_DTOR(SampleTableBox); +} + +nsresult +DataEntryUrlBox::Generate(uint32_t* aBoxSize) +{ + // location is null here, do nothing + size += location.Length(); + *aBoxSize = size; + + return NS_OK; +} + +nsresult +DataEntryUrlBox::Write() +{ + WRITE_FULLBOX(mControl, size) + return NS_OK; +} + +DataEntryUrlBox::DataEntryUrlBox() + : FullBox(NS_LITERAL_CSTRING("url "), 0, 0, (ISOControl*) nullptr) +{ + MOZ_COUNT_CTOR(DataEntryUrlBox); +} + +DataEntryUrlBox::DataEntryUrlBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("url "), 0, flags_media_at_the_same_file, aControl) +{ + MOZ_COUNT_CTOR(DataEntryUrlBox); +} + +DataEntryUrlBox::DataEntryUrlBox(const DataEntryUrlBox& aBox) + : FullBox(aBox.boxType, aBox.version, aBox.flags.to_ulong(), aBox.mControl) +{ + location = aBox.location; + MOZ_COUNT_CTOR(DataEntryUrlBox); +} + +DataEntryUrlBox::~DataEntryUrlBox() +{ + MOZ_COUNT_DTOR(DataEntryUrlBox); +} + +nsresult DataReferenceBox::Generate(uint32_t* aBoxSize) +{ + entry_count = 1; // only allow on entry here + size += sizeof(uint32_t); + + for (uint32_t i = 0; i < entry_count; i++) { + uint32_t box_size = 0; + DataEntryUrlBox* url = new DataEntryUrlBox(mControl); + url->Generate(&box_size); + size += box_size; + urls.AppendElement(url); + } + + *aBoxSize = size; + + return NS_OK; +} + +nsresult DataReferenceBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + + for (uint32_t i = 0; i < entry_count; i++) { + urls[i]->Write(); + } + + return NS_OK; +} + +DataReferenceBox::DataReferenceBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("dref"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(DataReferenceBox); +} + +DataReferenceBox::~DataReferenceBox() +{ + MOZ_COUNT_DTOR(DataReferenceBox); +} + +DataInformationBox::DataInformationBox(ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("dinf"), aControl) +{ + boxes.AppendElement(new DataReferenceBox(aControl)); + MOZ_COUNT_CTOR(DataInformationBox); +} + +DataInformationBox::~DataInformationBox() +{ + MOZ_COUNT_DTOR(DataInformationBox); +} + +nsresult +VideoMediaHeaderBox::Generate(uint32_t* aBoxSize) +{ + size += sizeof(graphicsmode) + + sizeof(opcolor); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +VideoMediaHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(graphicsmode); + mControl->WriteArray(opcolor, 3); + return NS_OK; +} + +VideoMediaHeaderBox::VideoMediaHeaderBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("vmhd"), 0, 1, aControl) + , graphicsmode(0) +{ + memset(opcolor, 0 , sizeof(opcolor)); + MOZ_COUNT_CTOR(VideoMediaHeaderBox); +} + +VideoMediaHeaderBox::~VideoMediaHeaderBox() +{ + MOZ_COUNT_DTOR(VideoMediaHeaderBox); +} + +nsresult +SoundMediaHeaderBox::Generate(uint32_t* aBoxSize) +{ + balance = 0; + reserved = 0; + size += sizeof(balance) + + sizeof(reserved); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +SoundMediaHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(balance); + mControl->Write(reserved); + + return NS_OK; +} + +SoundMediaHeaderBox::SoundMediaHeaderBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("smhd"), 0, 0, aControl) +{ + MOZ_COUNT_CTOR(SoundMediaHeaderBox); +} + +SoundMediaHeaderBox::~SoundMediaHeaderBox() +{ + MOZ_COUNT_DTOR(SoundMediaHeaderBox); +} + +MediaInformationBox::MediaInformationBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("minf"), aControl) +{ + mTrackType = aType; + + if (mTrackType == Audio_Track) { + boxes.AppendElement(new SoundMediaHeaderBox(aControl)); + } else if (mTrackType == Video_Track) { + boxes.AppendElement(new VideoMediaHeaderBox(aControl)); + } else { + MOZ_ASSERT(0); + } + + boxes.AppendElement(new DataInformationBox(aControl)); + boxes.AppendElement(new SampleTableBox(aType, aControl)); + MOZ_COUNT_CTOR(MediaInformationBox); +} + +MediaInformationBox::~MediaInformationBox() +{ + MOZ_COUNT_DTOR(MediaInformationBox); +} + +nsresult +HandlerBox::Generate(uint32_t* aBoxSize) +{ + pre_defined = 0; + if (mTrackType == Audio_Track) { + handler_type = FOURCC('s', 'o', 'u', 'n'); + } else if (mTrackType == Video_Track) { + handler_type = FOURCC('v', 'i', 'd', 'e'); + } + + size += sizeof(pre_defined) + + sizeof(handler_type) + + sizeof(reserved); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +HandlerBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(pre_defined); + mControl->Write(handler_type); + mControl->WriteArray(reserved, 3); + + return NS_OK; +} + +HandlerBox::HandlerBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("hdlr"), 0, 0, aControl) + , pre_defined(0) + , handler_type(0) +{ + mTrackType = aType; + memset(reserved, 0 , sizeof(reserved)); + MOZ_COUNT_CTOR(HandlerBox); +} + +HandlerBox::~HandlerBox() +{ + MOZ_COUNT_DTOR(HandlerBox); +} + +MediaHeaderBox::MediaHeaderBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("mdhd"), 0, 0, aControl) + , creation_time(0) + , modification_time(0) + , timescale(0) + , duration(0) + , pad(0) + , lang1(0) + , lang2(0) + , lang3(0) + , pre_defined(0) +{ + mTrackType = aType; + MOZ_COUNT_CTOR(MediaHeaderBox); +} + +MediaHeaderBox::~MediaHeaderBox() +{ + MOZ_COUNT_DTOR(MediaHeaderBox); +} + +uint32_t +MediaHeaderBox::GetTimeScale() +{ + if (mTrackType == Audio_Track) { + return mAudioMeta->GetAudioSampleRate(); + } + + return mVideoMeta->GetVideoClockRate(); +} + +nsresult +MediaHeaderBox::Generate(uint32_t* aBoxSize) +{ + creation_time = mControl->GetTime(); + modification_time = mControl->GetTime(); + timescale = GetTimeScale(); + duration = 0; // fragmented mp4 + + pad = 0; + lang1 = 'u' - 0x60; // "und" underdetermined language + lang2 = 'n' - 0x60; + lang3 = 'd' - 0x60; + size += (pad.size() + lang1.size() + lang2.size() + lang3.size()) / CHAR_BIT; + + pre_defined = 0; + size += sizeof(creation_time) + + sizeof(modification_time) + + sizeof(timescale) + + sizeof(duration) + + sizeof(pre_defined); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +MediaHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(creation_time); + mControl->Write(modification_time); + mControl->Write(timescale); + mControl->Write(duration); + mControl->WriteBits(pad.to_ulong(), pad.size()); + mControl->WriteBits(lang1.to_ulong(), lang1.size()); + mControl->WriteBits(lang2.to_ulong(), lang2.size()); + mControl->WriteBits(lang3.to_ulong(), lang3.size()); + mControl->Write(pre_defined); + + return NS_OK; +} + +MovieBox::MovieBox(ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("moov"), aControl) +{ + boxes.AppendElement(new MovieHeaderBox(aControl)); + if (aControl->HasAudioTrack()) { + boxes.AppendElement(new TrackBox(Audio_Track, aControl)); + } + if (aControl->HasVideoTrack()) { + boxes.AppendElement(new TrackBox(Video_Track, aControl)); + } + boxes.AppendElement(new MovieExtendsBox(aControl)); + MOZ_COUNT_CTOR(MovieBox); +} + +MovieBox::~MovieBox() +{ + MOZ_COUNT_DTOR(MovieBox); +} + +nsresult +MovieHeaderBox::Generate(uint32_t* aBoxSize) +{ + creation_time = mControl->GetTime(); + modification_time = mControl->GetTime(); + timescale = GetTimeScale(); + duration = 0; // The duration is always 0 in fragmented mp4. + next_track_ID = mControl->GetNextTrackID(); + + size += sizeof(next_track_ID) + + sizeof(creation_time) + + sizeof(modification_time) + + sizeof(timescale) + + sizeof(duration) + + sizeof(rate) + + sizeof(volume) + + sizeof(reserved16) + + sizeof(reserved32) + + sizeof(matrix) + + sizeof(pre_defined); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +MovieHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(creation_time); + mControl->Write(modification_time); + mControl->Write(timescale); + mControl->Write(duration); + mControl->Write(rate); + mControl->Write(volume); + mControl->Write(reserved16); + mControl->WriteArray(reserved32, 2); + mControl->WriteArray(matrix, 9); + mControl->WriteArray(pre_defined, 6); + mControl->Write(next_track_ID); + + return NS_OK; +} + +uint32_t +MovieHeaderBox::GetTimeScale() +{ + // Only audio track in container. + if (mAudioMeta && !mVideoMeta) { + return mAudioMeta->GetAudioSampleRate(); + } + + // return video rate + return mVideoMeta->GetVideoClockRate(); +} + +MovieHeaderBox::~MovieHeaderBox() +{ + MOZ_COUNT_DTOR(MovieHeaderBox); +} + +MovieHeaderBox::MovieHeaderBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("mvhd"), 0, 0, aControl) + , creation_time(0) + , modification_time(0) + , timescale(90000) + , duration(0) + , rate(0x00010000) + , volume(0x0100) + , reserved16(0) + , next_track_ID(1) +{ + memcpy(matrix, iso_matrix, sizeof(matrix)); + memset(reserved32, 0, sizeof(reserved32)); + memset(pre_defined, 0, sizeof(pre_defined)); + MOZ_COUNT_CTOR(MovieHeaderBox); +} + +TrackHeaderBox::TrackHeaderBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("tkhd"), 0, + flags_track_enabled | flags_track_in_movie | flags_track_in_preview, + aControl) + , creation_time(0) + , modification_time(0) + , track_ID(0) + , reserved(0) + , duration(0) + , layer(0) + , alternate_group(0) + , volume(0) + , reserved3(0) + , width(0) + , height(0) +{ + mTrackType = aType; + memcpy(matrix, iso_matrix, sizeof(matrix)); + memset(reserved2, 0, sizeof(reserved2)); + MOZ_COUNT_CTOR(TrackHeaderBox); +} + +TrackHeaderBox::~TrackHeaderBox() +{ + MOZ_COUNT_DTOR(TrackHeaderBox); +} + +nsresult +TrackHeaderBox::Generate(uint32_t* aBoxSize) +{ + creation_time = mControl->GetTime(); + modification_time = mControl->GetTime(); + track_ID = (mTrackType == Audio_Track ? + mControl->GetTrackID(mAudioMeta->GetKind()) : + mControl->GetTrackID(mVideoMeta->GetKind())); + // fragmented mp4 + duration = 0; + + // volume, audiotrack is always 0x0100 in 14496-12 8.3.2.2 + volume = (mTrackType == Audio_Track ? 0x0100 : 0); + + if (mTrackType == Video_Track) { + width = mVideoMeta->GetVideoDisplayWidth() << 16; + height = mVideoMeta->GetVideoDisplayHeight() << 16; + // Check display size, using the pixel size if any of them is invalid. + if (!width || !height) { + width = mVideoMeta->GetVideoWidth() << 16; + height = mVideoMeta->GetVideoHeight() << 16; + } + } + + size += sizeof(creation_time) + + sizeof(modification_time) + + sizeof(track_ID) + + sizeof(reserved) + + sizeof(duration) + + sizeof(reserved2) + + sizeof(layer) + + sizeof(alternate_group) + + sizeof(volume) + + sizeof(reserved3) + + sizeof(matrix) + + sizeof(width) + + sizeof(height); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +TrackHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(creation_time); + mControl->Write(modification_time); + mControl->Write(track_ID); + mControl->Write(reserved); + mControl->Write(duration); + mControl->WriteArray(reserved2, 2); + mControl->Write(layer); + mControl->Write(alternate_group); + mControl->Write(volume); + mControl->Write(reserved3); + mControl->WriteArray(matrix, 9); + mControl->Write(width); + mControl->Write(height); + + return NS_OK; +} + +nsresult +FileTypeBox::Generate(uint32_t* aBoxSize) +{ + minor_version = 0; + + if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_MP4) { + if (!mControl->HasVideoTrack() && mControl->HasAudioTrack()) { + major_brand = "M4A "; + } else { + major_brand = "MP42"; + } + compatible_brands.AppendElement("mp42"); + compatible_brands.AppendElement("isom"); + } else if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_3GP) { + major_brand = "3gp9"; + // According to 3GPP TS 26.244 V12.2.0, section 5.3.4, it's recommended to + // list all compatible brands here. 3GP spec supports fragment from '3gp6'. + compatible_brands.AppendElement("3gp9"); + compatible_brands.AppendElement("3gp8"); + compatible_brands.AppendElement("3gp7"); + compatible_brands.AppendElement("3gp6"); + compatible_brands.AppendElement("isom"); + } else if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_3G2) { + major_brand = "3g2a"; + // 3GPP2 Release 0 and A and 3GPP Release 6 allow movie fragmentation + compatible_brands.AppendElement("3gp9"); + compatible_brands.AppendElement("3gp8"); + compatible_brands.AppendElement("3gp7"); + compatible_brands.AppendElement("3gp6"); + compatible_brands.AppendElement("isom"); + compatible_brands.AppendElement("3g2c"); + compatible_brands.AppendElement("3g2b"); + compatible_brands.AppendElement("3g2a"); + } else { + MOZ_ASSERT(0); + } + + size += major_brand.Length() + + sizeof(minor_version) + + compatible_brands.Length() * 4; + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +FileTypeBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + mControl->WriteFourCC(major_brand.get()); + mControl->Write(minor_version); + uint32_t len = compatible_brands.Length(); + for (uint32_t i = 0; i < len; i++) { + mControl->WriteFourCC(compatible_brands[i].get()); + } + + return NS_OK; +} + +FileTypeBox::FileTypeBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("ftyp"), aControl) + , minor_version(0) +{ + MOZ_COUNT_CTOR(FileTypeBox); +} + +FileTypeBox::~FileTypeBox() +{ + MOZ_COUNT_DTOR(FileTypeBox); +} + +MediaBox::MediaBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("mdia"), aControl) +{ + mTrackType = aType; + boxes.AppendElement(new MediaHeaderBox(aType, aControl)); + boxes.AppendElement(new HandlerBox(aType, aControl)); + boxes.AppendElement(new MediaInformationBox(aType, aControl)); + MOZ_COUNT_CTOR(MediaBox); +} + +MediaBox::~MediaBox() +{ + MOZ_COUNT_DTOR(MediaBox); +} + +nsresult +DefaultContainerImpl::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + uint32_t box_size; + uint32_t len = boxes.Length(); + for (uint32_t i = 0; i < len; i++) { + rv = boxes.ElementAt(i)->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + } + *aBoxSize = size; + return NS_OK; +} + +nsresult +DefaultContainerImpl::Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) +{ + nsresult rv = Box::Find(aType, aOperations); + NS_ENSURE_SUCCESS(rv, rv); + + uint32_t len = boxes.Length(); + for (uint32_t i = 0; i < len; i++) { + rv = boxes.ElementAt(i)->Find(aType, aOperations); + NS_ENSURE_SUCCESS(rv, rv); + } + return NS_OK; +} + +nsresult +DefaultContainerImpl::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + + nsresult rv; + uint32_t len = boxes.Length(); + for (uint32_t i = 0; i < len; i++) { + rv = boxes.ElementAt(i)->Write(); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +DefaultContainerImpl::DefaultContainerImpl(const nsACString& aType, + ISOControl* aControl) + : Box(aType, aControl) +{ +} + +nsresult +Box::Write() +{ + mControl->Write(size); + mControl->WriteFourCC(boxType.get()); + return NS_OK; +} + +nsresult +Box::Find(const nsACString& aType, nsTArray<RefPtr<MuxerOperation>>& aOperations) +{ + if (boxType == aType) { + aOperations.AppendElement(this); + } + return NS_OK; +} + +Box::Box(const nsACString& aType, ISOControl* aControl) + : size(8), mControl(aControl) +{ + MOZ_ASSERT(aType.Length() == 4); + boxType = aType; + aControl->GetAudioMetadata(mAudioMeta); + aControl->GetVideoMetadata(mVideoMeta); +} + +FullBox::FullBox(const nsACString& aType, uint8_t aVersion, uint32_t aFlags, + ISOControl* aControl) + : Box(aType, aControl) +{ + std::bitset<24> tmp_flags(aFlags); + version = aVersion; + flags = tmp_flags; + size += sizeof(version) + flags.size() / CHAR_BIT; +} + +nsresult +FullBox::Write() +{ + Box::Write(); + mControl->Write(version); + mControl->WriteBits(flags.to_ulong(), flags.size()); + return NS_OK; +} + +TrackBox::TrackBox(uint32_t aTrackType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("trak"), aControl) +{ + boxes.AppendElement(new TrackHeaderBox(aTrackType, aControl)); + boxes.AppendElement(new MediaBox(aTrackType, aControl)); + MOZ_COUNT_CTOR(TrackBox); +} + +TrackBox::~TrackBox() +{ + MOZ_COUNT_DTOR(TrackBox); +} + +SampleEntryBox::SampleEntryBox(const nsACString& aFormat, ISOControl* aControl) + : Box(aFormat, aControl) + , data_reference_index(0) +{ + data_reference_index = 1; // There is only one data reference in each track. + size += sizeof(reserved) + + sizeof(data_reference_index); + memset(reserved, 0, sizeof(reserved)); +} + +nsresult +SampleEntryBox::Write() +{ + Box::Write(); + mControl->Write(reserved, sizeof(reserved)); + mControl->Write(data_reference_index); + return NS_OK; +} + +nsresult +AudioSampleEntry::Write() +{ + SampleEntryBox::Write(); + mControl->Write(sound_version); + mControl->Write(reserved2, sizeof(reserved2)); + mControl->Write(channels); + mControl->Write(sample_size); + mControl->Write(compressionId); + mControl->Write(packet_size); + mControl->Write(timeScale); + return NS_OK; +} + +AudioSampleEntry::AudioSampleEntry(const nsACString& aFormat, ISOControl* aControl) + : SampleEntryBox(aFormat, aControl) + , sound_version(0) + , channels(2) + , sample_size(16) + , compressionId(0) + , packet_size(0) + , timeScale(0) +{ + memset(reserved2, 0 , sizeof(reserved2)); + channels = mAudioMeta->GetAudioChannels(); + timeScale = mAudioMeta->GetAudioSampleRate() << 16; + + size += sizeof(sound_version) + + sizeof(reserved2) + + sizeof(sample_size) + + sizeof(channels) + + sizeof(packet_size) + + sizeof(compressionId) + + sizeof(timeScale); + + MOZ_COUNT_CTOR(AudioSampleEntry); +} + +AudioSampleEntry::~AudioSampleEntry() +{ + MOZ_COUNT_DTOR(AudioSampleEntry); +} + +nsresult +VisualSampleEntry::Write() +{ + SampleEntryBox::Write(); + + mControl->Write(reserved, sizeof(reserved)); + mControl->Write(width); + mControl->Write(height); + mControl->Write(horizresolution); + mControl->Write(vertresolution); + mControl->Write(reserved2); + mControl->Write(frame_count); + mControl->Write(compressorName, sizeof(compressorName)); + mControl->Write(depth); + mControl->Write(pre_defined); + + return NS_OK; +} + +VisualSampleEntry::VisualSampleEntry(const nsACString& aFormat, ISOControl* aControl) + : SampleEntryBox(aFormat, aControl) + , width(0) + , height(0) + , horizresolution(resolution_72_dpi) + , vertresolution(resolution_72_dpi) + , reserved2(0) + , frame_count(1) + , depth(video_depth) + , pre_defined(-1) +{ + memset(reserved, 0 , sizeof(reserved)); + memset(compressorName, 0 , sizeof(compressorName)); + + // both fields occupy 16 bits defined in 14496-2 6.2.3. + width = mVideoMeta->GetVideoWidth(); + height = mVideoMeta->GetVideoHeight(); + + size += sizeof(reserved) + + sizeof(width) + + sizeof(height) + + sizeof(horizresolution) + + sizeof(vertresolution) + + sizeof(reserved2) + + sizeof(frame_count) + + sizeof(compressorName) + + sizeof(depth) + + sizeof(pre_defined); + + MOZ_COUNT_CTOR(VisualSampleEntry); +} + +VisualSampleEntry::~VisualSampleEntry() +{ + MOZ_COUNT_DTOR(VisualSampleEntry); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h new file mode 100644 index 000000000..a6dc1b046 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h @@ -0,0 +1,781 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOMediaBoxes_h_ +#define ISOMediaBoxes_h_ + +#include <bitset> +#include "nsString.h" +#include "nsTArray.h" +#include "nsAutoPtr.h" +#include "MuxerOperation.h" +#include "mozilla/UniquePtr.h" + +#define WRITE_FULLBOX(_compositor, _size) \ + BoxSizeChecker checker(_compositor, _size); \ + FullBox::Write(); + +#define FOURCC(a, b, c, d) ( ((a) << 24) | ((b) << 16) | ((c) << 8) | (d) ) + +namespace mozilla { + +/** + * track type from spec 8.4.3.3 + */ +#define Audio_Track 0x01 +#define Video_Track 0x02 + +class AudioTrackMetadata; +class VideoTrackMetadata; +class ISOControl; + +/** + * This is the base class for all ISO media format boxes. + * It provides the fields of box type(four CC) and size. + * The data members in the beginning of a Box (or its descendants) + * are the 14496-12 defined member. Other members prefix with 'm' + * are private control data. + * + * This class is for inherited only, it shouldn't be instanced directly. + */ +class Box : public MuxerOperation { +protected: + // ISO BMFF members + uint32_t size; // 14496-12 4-2 'Object Structure'. Size of this box. + nsCString boxType; // four CC name, all table names are listed in + // 14496-12 table 1. + +public: + // MuxerOperation methods + nsresult Write() override; + nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) override; + + // This helper class will compare the written size in Write() and the size in + // Generate(). If their are not equal, it will assert. + class BoxSizeChecker { + public: + BoxSizeChecker(ISOControl* aControl, uint32_t aSize); + ~BoxSizeChecker(); + + uint32_t ori_size; + uint32_t box_size; + ISOControl* mControl; + }; + +protected: + Box() = delete; + Box(const nsACString& aType, ISOControl* aControl); + + ISOControl* mControl; + RefPtr<AudioTrackMetadata> mAudioMeta; + RefPtr<VideoTrackMetadata> mVideoMeta; +}; + +/** + * FullBox (and its descendants) is the box which contains the 'real' data + * members. It is the edge in the ISO box structure and it doesn't contain + * any box. + * + * This class is for inherited only, it shouldn't be instanced directly. + */ +class FullBox : public Box { +public: + // ISO BMFF members + uint8_t version; // 14496-12 4.2 'Object Structure' + std::bitset<24> flags; // + + // MuxerOperation methods + nsresult Write() override; + +protected: + // FullBox methods + FullBox(const nsACString& aType, uint8_t aVersion, uint32_t aFlags, + ISOControl* aControl); + FullBox() = delete; +}; + +/** + * The default implementation of the container box. + * Basically, the container box inherits this class and overrides the + * constructor only. + * + * According to 14496-12 3.1.1 'container box', a container box is + * 'box whose sole purpose is to contain and group a set of related boxes' + * + * This class is for inherited only, it shouldn't be instanced directly. + */ +class DefaultContainerImpl : public Box { +public: + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) override; + +protected: + // DefaultContainerImpl methods + DefaultContainerImpl(const nsACString& aType, ISOControl* aControl); + DefaultContainerImpl() = delete; + + nsTArray<RefPtr<MuxerOperation>> boxes; +}; + +// 14496-12 4.3 'File Type Box' +// Box type: 'ftyp' +class FileTypeBox : public Box { +public: + // ISO BMFF members + nsCString major_brand; // four chars + uint32_t minor_version; + nsTArray<nsCString> compatible_brands; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // FileTypeBox methods + FileTypeBox(ISOControl* aControl); + ~FileTypeBox(); +}; + +// 14496-12 8.2.1 'Movie Box' +// Box type: 'moov' +// MovieBox contains MovieHeaderBox, TrackBox and MovieExtendsBox. +class MovieBox : public DefaultContainerImpl { +public: + MovieBox(ISOControl* aControl); + ~MovieBox(); +}; + +// 14496-12 8.2.2 'Movie Header Box' +// Box type: 'mvhd' +class MovieHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t creation_time; + uint32_t modification_time; + uint32_t timescale; + uint32_t duration; + uint32_t rate; + uint16_t volume; + uint16_t reserved16; + uint32_t reserved32[2]; + uint32_t matrix[9]; + uint32_t pre_defined[6]; + uint32_t next_track_ID; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MovieHeaderBox methods + MovieHeaderBox(ISOControl* aControl); + ~MovieHeaderBox(); + uint32_t GetTimeScale(); +}; + +// 14496-12 8.4.2 'Media Header Box' +// Box type: 'mdhd' +class MediaHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t creation_time; + uint32_t modification_time; + uint32_t timescale; + uint32_t duration; + std::bitset<1> pad; + std::bitset<5> lang1; + std::bitset<5> lang2; + std::bitset<5> lang3; + uint16_t pre_defined; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MediaHeaderBox methods + MediaHeaderBox(uint32_t aType, ISOControl* aControl); + ~MediaHeaderBox(); + uint32_t GetTimeScale(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.3.1 'Track Box' +// Box type: 'trak' +// TrackBox contains TrackHeaderBox and MediaBox. +class TrackBox : public DefaultContainerImpl { +public: + TrackBox(uint32_t aTrackType, ISOControl* aControl); + ~TrackBox(); +}; + +// 14496-12 8.1.1 'Media Data Box' +// Box type: 'mdat' +class MediaDataBox : public Box { +public: + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MediaDataBox methods + uint32_t GetAllSampleSize() { return mAllSampleSize; } + uint32_t FirstSampleOffsetInMediaDataBox() { return mFirstSampleOffset; } + MediaDataBox(uint32_t aTrackType, ISOControl* aControl); + ~MediaDataBox(); + +protected: + uint32_t mAllSampleSize; // All audio and video sample size in this box. + uint32_t mFirstSampleOffset; // The offset of first sample in this box from + // the beginning of this mp4 file. + uint32_t mTrackType; +}; + +// flags for TrackRunBox::flags, 14496-12 8.8.8.1. +#define flags_data_offset_present 0x000001 +#define flags_first_sample_flags_present 0x000002 +#define flags_sample_duration_present 0x000100 +#define flags_sample_size_present 0x000200 +#define flags_sample_flags_present 0x000400 +#define flags_sample_composition_time_offsets_present 0x000800 + +// flag for TrackRunBox::tbl::sample_flags and TrackExtendsBox::default_sample_flags +// which is defined in 14496-12 8.8.3.1. +uint32_t set_sample_flags(bool aSync); + +// 14496-12 8.8.8 'Track Fragment Run Box' +// Box type: 'trun' +class TrackRunBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t sample_duration; + uint32_t sample_size; + uint32_t sample_flags; + uint32_t sample_composition_time_offset; + } tbl; + + uint32_t sample_count; + // the following are optional fields + uint32_t data_offset; // data offset exists when audio/video are present in file. + uint32_t first_sample_flags; + UniquePtr<tbl[]> sample_info_table; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackRunBox methods + uint32_t GetAllSampleSize() { return mAllSampleSize; } + nsresult SetDataOffset(uint32_t aOffset); + + TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl); + ~TrackRunBox(); + +protected: + uint32_t fillSampleTable(); + + uint32_t mAllSampleSize; + uint32_t mTrackType; +}; + +// tf_flags in TrackFragmentHeaderBox, 14496-12 8.8.7.1. +#define base_data_offset_present 0x000001 +#define sample_description_index_present 0x000002 +#define default_sample_duration_present 0x000008 +#define default_sample_size_present 0x000010 +#define default_sample_flags_present 0x000020 +#define duration_is_empty 0x010000 +#define default_base_is_moof 0x020000 + +// 14496-12 8.8.7 'Track Fragment Header Box' +// Box type: 'tfhd' +class TrackFragmentHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t track_ID; + uint64_t base_data_offset; + uint32_t default_sample_duration; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackFragmentHeaderBox methods + nsresult UpdateBaseDataOffset(uint64_t aOffset); // The offset of the first + // sample in file. + + TrackFragmentHeaderBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl); + ~TrackFragmentHeaderBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.6 'Track Fragment Box' +// Box type: 'traf' +// TrackFragmentBox cotains TrackFragmentHeaderBox and TrackRunBox. +class TrackFragmentBox : public DefaultContainerImpl { +public: + TrackFragmentBox(uint32_t aType, ISOControl* aControl); + ~TrackFragmentBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.5 'Movie Fragment Header Box' +// Box type: 'mfhd' +class MovieFragmentHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t sequence_number; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MovieFragmentHeaderBox methods + MovieFragmentHeaderBox(uint32_t aType, ISOControl* aControl); + ~MovieFragmentHeaderBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.4 'Movie Fragment Box' +// Box type: 'moof' +// MovieFragmentBox contains MovieFragmentHeaderBox and TrackFragmentBox. +class MovieFragmentBox : public DefaultContainerImpl { +public: + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + + // MovieFragmentBox methods + MovieFragmentBox(uint32_t aType, ISOControl* aControl); + ~MovieFragmentBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.3 'Track Extends Box' +// Box type: 'trex' +class TrackExtendsBox : public FullBox { +public: + // ISO BMFF members + uint32_t track_ID; + uint32_t default_sample_description_index; + uint32_t default_sample_duration; + uint32_t default_sample_size; + uint32_t default_sample_flags; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackExtendsBox methods + TrackExtendsBox(uint32_t aType, ISOControl* aControl); + ~TrackExtendsBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.1 'Movie Extends Box' +// Box type: 'mvex' +// MovieExtendsBox contains TrackExtendsBox. +class MovieExtendsBox : public DefaultContainerImpl { +public: + MovieExtendsBox(ISOControl* aControl); + ~MovieExtendsBox(); +}; + +// 14496-12 8.7.5 'Chunk Offset Box' +// Box type: 'stco' +class ChunkOffsetBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t chunk_offset; + } tbl; + + uint32_t entry_count; + UniquePtr<tbl[]> sample_tbl; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // ChunkOffsetBox methods + ChunkOffsetBox(uint32_t aType, ISOControl* aControl); + ~ChunkOffsetBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.7.4 'Sample To Chunk Box' +// Box type: 'stsc' +class SampleToChunkBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t first_chunk; + uint32_t sample_per_chunk; + uint32_t sample_description_index; + } tbl; + + uint32_t entry_count; + UniquePtr<tbl[]> sample_tbl; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SampleToChunkBox methods + SampleToChunkBox(uint32_t aType, ISOControl* aControl); + ~SampleToChunkBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.6.1.2 'Decoding Time to Sample Box' +// Box type: 'stts' +class TimeToSampleBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t sample_count; + uint32_t sample_delta; + } tbl; + + uint32_t entry_count; + UniquePtr<tbl[]> sample_tbl; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TimeToSampleBox methods + TimeToSampleBox(uint32_t aType, ISOControl* aControl); + ~TimeToSampleBox(); + +protected: + uint32_t mTrackType; +}; + +/** + * 14496-12 8.5.2 'Sample Description Box' + * This is the base class for VisualSampleEntry and AudioSampleEntry. + * + * This class is for inherited only, it shouldn't be instanced directly. + * + * The inhertied tree of a codec box should be: + * + * +--> AVCSampleEntry + * +--> VisualSampleEntryBox + + * | +--> ... + * SampleEntryBox + + * | +--> MP4AudioSampleEntry + * +--> AudioSampleEntryBox + + * +--> AMRSampleEntry + * + + * +--> ... + * + */ +class SampleEntryBox : public Box { +public: + // ISO BMFF members + uint8_t reserved[6]; + uint16_t data_reference_index; + + // sampleentrybox methods + SampleEntryBox(const nsACString& aFormat, ISOControl* aControl); + + // MuxerOperation methods + nsresult Write() override; + +protected: + SampleEntryBox() = delete; +}; + +// 14496-12 8.5.2 'Sample Description Box' +// Box type: 'stsd' +class SampleDescriptionBox : public FullBox { +public: + // ISO BMFF members + uint32_t entry_count; + RefPtr<SampleEntryBox> sample_entry_box; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SampleDescriptionBox methods + SampleDescriptionBox(uint32_t aType, ISOControl* aControl); + ~SampleDescriptionBox(); + +protected: + nsresult CreateAudioSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry); + nsresult CreateVideoSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry); + + uint32_t mTrackType; +}; + +// 14496-12 8.5.2.2 +// The base class for audio codec box. +// This class is for inherited only, it shouldn't be instanced directly. +class AudioSampleEntry : public SampleEntryBox { +public: + // ISO BMFF members + uint16_t sound_version; + uint8_t reserved2[6]; + uint16_t channels; + uint16_t sample_size; + uint16_t compressionId; + uint16_t packet_size; + uint32_t timeScale; // (sample rate of media) <<16 + + // MuxerOperation methods + nsresult Write() override; + + ~AudioSampleEntry(); + +protected: + AudioSampleEntry(const nsACString& aFormat, ISOControl* aControl); +}; + +// 14496-12 8.5.2.2 +// The base class for video codec box. +// This class is for inherited only, it shouldn't be instanced directly. +class VisualSampleEntry : public SampleEntryBox { +public: + // ISO BMFF members + uint8_t reserved[16]; + uint16_t width; + uint16_t height; + + uint32_t horizresolution; // 72 dpi + uint32_t vertresolution; // 72 dpi + uint32_t reserved2; + uint16_t frame_count; // 1, defined in 14496-12 8.5.2.2 + + uint8_t compressorName[32]; + uint16_t depth; // 0x0018, defined in 14496-12 8.5.2.2; + uint16_t pre_defined; // -1, defined in 14496-12 8.5.2.2; + + // MuxerOperation methods + nsresult Write() override; + + // VisualSampleEntry methods + ~VisualSampleEntry(); + +protected: + VisualSampleEntry(const nsACString& aFormat, ISOControl* aControl); +}; + +// 14496-12 8.7.3.2 'Sample Size Box' +// Box type: 'stsz' +class SampleSizeBox : public FullBox { +public: + // ISO BMFF members + uint32_t sample_size; + uint32_t sample_count; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SampleSizeBox methods + SampleSizeBox(ISOControl* aControl); + ~SampleSizeBox(); +}; + +// 14496-12 8.5.1 'Sample Table Box' +// Box type: 'stbl' +// +// SampleTableBox contains SampleDescriptionBox, +// TimeToSampleBox, +// SampleToChunkBox, +// SampleSizeBox and +// ChunkOffsetBox. +class SampleTableBox : public DefaultContainerImpl { +public: + SampleTableBox(uint32_t aType, ISOControl* aControl); + ~SampleTableBox(); +}; + +// 14496-12 8.7.2 'Data Reference Box' +// Box type: 'url ' +class DataEntryUrlBox : public FullBox { +public: + // ISO BMFF members + // flags in DataEntryUrlBox::flags + const static uint16_t flags_media_at_the_same_file = 0x0001; + + nsCString location; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // DataEntryUrlBox methods + DataEntryUrlBox(); + DataEntryUrlBox(ISOControl* aControl); + DataEntryUrlBox(const DataEntryUrlBox& aBox); + ~DataEntryUrlBox(); +}; + +// 14496-12 8.7.2 'Data Reference Box' +// Box type: 'dref' +class DataReferenceBox : public FullBox { +public: + // ISO BMFF members + uint32_t entry_count; + nsTArray<nsAutoPtr<DataEntryUrlBox>> urls; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // DataReferenceBox methods + DataReferenceBox(ISOControl* aControl); + ~DataReferenceBox(); +}; + +// 14496-12 8.7.1 'Data Information Box' +// Box type: 'dinf' +// DataInformationBox contains DataReferenceBox. +class DataInformationBox : public DefaultContainerImpl { +public: + DataInformationBox(ISOControl* aControl); + ~DataInformationBox(); +}; + +// 14496-12 8.4.5.2 'Video Media Header Box' +// Box type: 'vmhd' +class VideoMediaHeaderBox : public FullBox { +public: + // ISO BMFF members + uint16_t graphicsmode; + uint16_t opcolor[3]; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // VideoMediaHeaderBox methods + VideoMediaHeaderBox(ISOControl* aControl); + ~VideoMediaHeaderBox(); +}; + +// 14496-12 8.4.5.3 'Sound Media Header Box' +// Box type: 'smhd' +class SoundMediaHeaderBox : public FullBox { +public: + // ISO BMFF members + uint16_t balance; + uint16_t reserved; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SoundMediaHeaderBox methods + SoundMediaHeaderBox(ISOControl* aControl); + ~SoundMediaHeaderBox(); +}; + +// 14496-12 8.4.4 'Media Information Box' +// Box type: 'minf' +// MediaInformationBox contains SoundMediaHeaderBox, DataInformationBox and +// SampleTableBox. +class MediaInformationBox : public DefaultContainerImpl { +public: + MediaInformationBox(uint32_t aType, ISOControl* aControl); + ~MediaInformationBox(); + +protected: + uint32_t mTrackType; +}; + +// flags for TrackHeaderBox::flags. +#define flags_track_enabled 0x000001 +#define flags_track_in_movie 0x000002 +#define flags_track_in_preview 0x000004 + +// 14496-12 8.3.2 'Track Header Box' +// Box type: 'tkhd' +class TrackHeaderBox : public FullBox { +public: + // ISO BMFF members + // version = 0 + uint32_t creation_time; + uint32_t modification_time; + uint32_t track_ID; + uint32_t reserved; + uint32_t duration; + + uint32_t reserved2[2]; + uint16_t layer; + uint16_t alternate_group; + uint16_t volume; + uint16_t reserved3; + uint32_t matrix[9]; + uint32_t width; + uint32_t height; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackHeaderBox methods + TrackHeaderBox(uint32_t aType, ISOControl* aControl); + ~TrackHeaderBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.4.3 'Handler Reference Box' +// Box type: 'hdlr' +class HandlerBox : public FullBox { +public: + // ISO BMFF members + uint32_t pre_defined; + uint32_t handler_type; + uint32_t reserved[3]; + nsCString name; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // HandlerBox methods + HandlerBox(uint32_t aType, ISOControl* aControl); + ~HandlerBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.4.1 'Media Box' +// Box type: 'mdia' +// MediaBox contains MediaHeaderBox, HandlerBox, and MediaInformationBox. +class MediaBox : public DefaultContainerImpl { +public: + MediaBox(uint32_t aType, ISOControl* aControl); + ~MediaBox(); + +protected: + uint32_t mTrackType; +}; + +} +#endif // ISOMediaBoxes_h_ diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp new file mode 100644 index 000000000..fa23616e9 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp @@ -0,0 +1,234 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ISOMediaWriter.h" +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "ISOTrackMetadata.h" +#include "nsThreadUtils.h" +#include "MediaEncoder.h" +#include "VideoUtils.h" +#include "GeckoProfiler.h" + +#undef LOG +#ifdef MOZ_WIDGET_GONK +#include <android/log.h> +#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args); +#else +#define LOG(args, ...) +#endif + +namespace mozilla { + +const static uint32_t FRAG_DURATION = 2 * USECS_PER_S; // microsecond per unit + +ISOMediaWriter::ISOMediaWriter(uint32_t aType, uint32_t aHint) + : ContainerWriter() + , mState(MUXING_HEAD) + , mBlobReady(false) + , mType(0) +{ + if (aType & CREATE_AUDIO_TRACK) { + mType |= Audio_Track; + } + if (aType & CREATE_VIDEO_TRACK) { + mType |= Video_Track; + } + mControl = new ISOControl(aHint); + MOZ_COUNT_CTOR(ISOMediaWriter); +} + +ISOMediaWriter::~ISOMediaWriter() +{ + MOZ_COUNT_DTOR(ISOMediaWriter); +} + +nsresult +ISOMediaWriter::RunState() +{ + nsresult rv; + switch (mState) { + case MUXING_HEAD: + { + rv = mControl->GenerateFtyp(); + NS_ENSURE_SUCCESS(rv, rv); + rv = mControl->GenerateMoov(); + NS_ENSURE_SUCCESS(rv, rv); + mState = MUXING_FRAG; + break; + } + case MUXING_FRAG: + { + rv = mControl->GenerateMoof(mType); + NS_ENSURE_SUCCESS(rv, rv); + + bool EOS; + if (ReadyToRunState(EOS) && EOS) { + mState = MUXING_DONE; + } + break; + } + case MUXING_DONE: + { + break; + } + } + mBlobReady = true; + return NS_OK; +} + +nsresult +ISOMediaWriter::WriteEncodedTrack(const EncodedFrameContainer& aData, + uint32_t aFlags) +{ + PROFILER_LABEL("ISOMediaWriter", "WriteEncodedTrack", + js::ProfileEntry::Category::OTHER); + // Muxing complete, it doesn't allowed to reentry again. + if (mState == MUXING_DONE) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + FragmentBuffer* frag = nullptr; + uint32_t len = aData.GetEncodedFrames().Length(); + + if (!len) { + // no frame? why bother to WriteEncodedTrack + return NS_OK; + } + for (uint32_t i = 0; i < len; i++) { + RefPtr<EncodedFrame> frame(aData.GetEncodedFrames()[i]); + EncodedFrame::FrameType type = frame->GetFrameType(); + if (type == EncodedFrame::AAC_AUDIO_FRAME || + type == EncodedFrame::AAC_CSD || + type == EncodedFrame::AMR_AUDIO_FRAME || + type == EncodedFrame::AMR_AUDIO_CSD || + type == EncodedFrame::EVRC_AUDIO_FRAME || + type == EncodedFrame::EVRC_AUDIO_CSD) { + frag = mAudioFragmentBuffer; + } else if (type == EncodedFrame::AVC_I_FRAME || + type == EncodedFrame::AVC_P_FRAME || + type == EncodedFrame::AVC_B_FRAME || + type == EncodedFrame::AVC_CSD) { + frag = mVideoFragmentBuffer; + } else { + MOZ_ASSERT(0); + return NS_ERROR_FAILURE; + } + + frag->AddFrame(frame); + } + + // Encoder should send CSD (codec specific data) frame before sending the + // audio/video frames. When CSD data is ready, it is sufficient to generate a + // moov data. If encoder doesn't send CSD yet, muxer needs to wait before + // generating anything. + if (mType & Audio_Track && (!mAudioFragmentBuffer || + !mAudioFragmentBuffer->HasCSD())) { + return NS_OK; + } + if (mType & Video_Track && (!mVideoFragmentBuffer || + !mVideoFragmentBuffer->HasCSD())) { + return NS_OK; + } + + // Only one FrameType in EncodedFrameContainer so it doesn't need to be + // inside the for-loop. + if (frag && (aFlags & END_OF_STREAM)) { + frag->SetEndOfStream(); + } + + nsresult rv; + bool EOS; + if (ReadyToRunState(EOS)) { + // Because track encoder won't generate new data after EOS, it needs to make + // sure the state reaches MUXING_DONE when EOS is signaled. + do { + rv = RunState(); + } while (EOS && mState != MUXING_DONE); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +bool +ISOMediaWriter::ReadyToRunState(bool& aEOS) +{ + aEOS = false; + bool bReadyToMux = true; + if ((mType & Audio_Track) && (mType & Video_Track)) { + if (!mAudioFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + if (!mVideoFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + + if (mAudioFragmentBuffer->EOS() && mVideoFragmentBuffer->EOS()) { + aEOS = true; + bReadyToMux = true; + } + } else if (mType == Audio_Track) { + if (!mAudioFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + if (mAudioFragmentBuffer->EOS()) { + aEOS = true; + bReadyToMux = true; + } + } else if (mType == Video_Track) { + if (!mVideoFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + if (mVideoFragmentBuffer->EOS()) { + aEOS = true; + bReadyToMux = true; + } + } + + return bReadyToMux; +} + +nsresult +ISOMediaWriter::GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags) +{ + PROFILER_LABEL("ISOMediaWriter", "GetContainerData", + js::ProfileEntry::Category::OTHER); + if (mBlobReady) { + if (mState == MUXING_DONE) { + mIsWritingComplete = true; + } + mBlobReady = false; + return mControl->GetBufs(aOutputBufs); + } + return NS_OK; +} + +nsresult +ISOMediaWriter::SetMetadata(TrackMetadataBase* aMetadata) +{ + PROFILER_LABEL("ISOMediaWriter", "SetMetadata", + js::ProfileEntry::Category::OTHER); + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_AAC || + aMetadata->GetKind() == TrackMetadataBase::METADATA_AMR || + aMetadata->GetKind() == TrackMetadataBase::METADATA_EVRC) { + mControl->SetMetadata(aMetadata); + mAudioFragmentBuffer = new FragmentBuffer(Audio_Track, FRAG_DURATION); + mControl->SetFragment(mAudioFragmentBuffer); + return NS_OK; + } + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_AVC) { + mControl->SetMetadata(aMetadata); + mVideoFragmentBuffer = new FragmentBuffer(Video_Track, FRAG_DURATION); + mControl->SetFragment(mVideoFragmentBuffer); + return NS_OK; + } + + return NS_ERROR_FAILURE; +} + +} // namespace mozilla diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h new file mode 100644 index 000000000..cccbbe3cb --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h @@ -0,0 +1,108 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOMediaWriter_h_ +#define ISOMediaWriter_h_ + +#include "ContainerWriter.h" +#include "nsAutoPtr.h" +#include "nsIRunnable.h" + +namespace mozilla { + +class ISOControl; +class FragmentBuffer; + +class ISOMediaWriter : public ContainerWriter +{ +public: + // Generate an fragmented MP4 stream, ISO/IEC 14496-12. + // Brand names in 'ftyp' box are 'isom' and 'mp42'. + const static uint32_t TYPE_FRAG_MP4 = 1 << 0; + + // Generate an fragmented 3GP stream, 3GPP TS 26.244, + // '5.4.3 Basic profile'. + // Brand names in 'ftyp' box are '3gp9' and 'isom'. + const static uint32_t TYPE_FRAG_3GP = 1 << 1; + + // Generate an fragmented 3G2 stream, 3GPP2 C.S0050-B + // Brand names in 'ftyp' box are '3g2c' and 'isom' + const static uint32_t TYPE_FRAG_3G2 = 1 << 2; + + // aType is the combination of CREATE_AUDIO_TRACK and CREATE_VIDEO_TRACK. + // It is a hint to muxer that the output streaming contains audio, video + // or both. + // + // aHint is one of the value in TYPE_XXXXXXXX. It is a hint to muxer what kind + // of ISO format should be generated. + ISOMediaWriter(uint32_t aType, uint32_t aHint = TYPE_FRAG_MP4); + ~ISOMediaWriter(); + + // ContainerWriter methods + nsresult WriteEncodedTrack(const EncodedFrameContainer &aData, + uint32_t aFlags = 0) override; + + nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags = 0) override; + + nsresult SetMetadata(TrackMetadataBase* aMetadata) override; + +protected: + /** + * The state of each state will generate one or more blob. + * Each blob will be a moov, moof, moof... until receiving EOS. + * The generated sequence is: + * + * moov -> moof -> moof -> ... -> moof -> moof + * + * Following is the details of each state. + * MUXING_HEAD: + * It collects the metadata to generate a moov. The state transits to + * MUXING_HEAD after output moov blob. + * + * MUXING_FRAG: + * It collects enough audio/video data to generate a fragment blob. This + * will be repeated until END_OF_STREAM and then transiting to MUXING_DONE. + * + * MUXING_DONE: + * End of ISOMediaWriter life cycle. + */ + enum MuxState { + MUXING_HEAD, + MUXING_FRAG, + MUXING_DONE, + }; + +private: + nsresult RunState(); + + // True if one of following conditions hold: + // 1. Audio/Video accumulates enough data to generate a moof. + // 2. Get EOS signal. + // aEOS will be assigned to true if it gets EOS signal. + bool ReadyToRunState(bool& aEOS); + + // The main class to generate and iso box. Its life time is same as + // ISOMediaWriter and deleted only if ISOMediaWriter is destroyed. + nsAutoPtr<ISOControl> mControl; + + // Buffers to keep audio/video data frames, they are created when metadata is + // received. Only one instance for each media type is allowed and they will be + // deleted only if ISOMediaWriter is destroyed. + nsAutoPtr<FragmentBuffer> mAudioFragmentBuffer; + nsAutoPtr<FragmentBuffer> mVideoFragmentBuffer; + + MuxState mState; + + // A flag to indicate the output buffer is ready to blob out. + bool mBlobReady; + + // Combination of Audio_Track or Video_Track. + uint32_t mType; +}; + +} // namespace mozilla + +#endif // ISOMediaWriter_h_ diff --git a/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h b/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h new file mode 100644 index 000000000..3613e1e9e --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h @@ -0,0 +1,131 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOTrackMetadata_h_ +#define ISOTrackMetadata_h_ + +#include "TrackMetadataBase.h" + +namespace mozilla { + +class AACTrackMetadata : public AudioTrackMetadata { +public: + // AudioTrackMetadata members + uint32_t GetAudioFrameDuration() override { return mFrameDuration; } + uint32_t GetAudioFrameSize() override { return mFrameSize; } + uint32_t GetAudioSampleRate() override { return mSampleRate; } + uint32_t GetAudioChannels() override { return mChannels; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_AAC; } + + // AACTrackMetadata members + AACTrackMetadata() + : mSampleRate(0) + , mFrameDuration(0) + , mFrameSize(0) + , mChannels(0) { + MOZ_COUNT_CTOR(AACTrackMetadata); + } + ~AACTrackMetadata() { MOZ_COUNT_DTOR(AACTrackMetadata); } + + uint32_t mSampleRate; // From 14496-3 table 1.16, it could be 7350 ~ 96000. + uint32_t mFrameDuration; // Audio frame duration based on SampleRate. + uint32_t mFrameSize; // Audio frame size, 0 is variant size. + uint32_t mChannels; // Channel number, it should be 1 or 2. +}; + +// AVC clock rate is 90k Hz. +#define AVC_CLOCK_RATE 90000 + +class AVCTrackMetadata : public VideoTrackMetadata { +public: + // VideoTrackMetadata members + uint32_t GetVideoHeight() override { return mHeight; } + uint32_t GetVideoWidth() override {return mWidth; } + uint32_t GetVideoDisplayHeight() override { return mDisplayHeight; } + uint32_t GetVideoDisplayWidth() override { return mDisplayWidth; } + uint32_t GetVideoClockRate() override { return AVC_CLOCK_RATE; } + uint32_t GetVideoFrameRate() override { return mFrameRate; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_AVC; } + + // AVCTrackMetadata + AVCTrackMetadata() + : mHeight(0) + , mWidth(0) + , mDisplayHeight(0) + , mDisplayWidth(0) + , mFrameRate(0) { + MOZ_COUNT_CTOR(AVCTrackMetadata); + } + ~AVCTrackMetadata() { MOZ_COUNT_DTOR(AVCTrackMetadata); } + + uint32_t mHeight; + uint32_t mWidth; + uint32_t mDisplayHeight; + uint32_t mDisplayWidth; + uint32_t mFrameRate; // frames per second +}; + + +// AMR sample rate is 8000 samples/s. +#define AMR_SAMPLE_RATE 8000 + +// Channel number is always 1. +#define AMR_CHANNELS 1 + +// AMR speech codec, 3GPP TS 26.071. Encoder and continer support AMR-NB only +// currently. +class AMRTrackMetadata : public AudioTrackMetadata { +public: + // AudioTrackMetadata members + // + // The number of sample sets generates by encoder is variant. So the + // frame duration and frame size are both 0. + uint32_t GetAudioFrameDuration() override { return 0; } + uint32_t GetAudioFrameSize() override { return 0; } + uint32_t GetAudioSampleRate() override { return AMR_SAMPLE_RATE; } + uint32_t GetAudioChannels() override { return AMR_CHANNELS; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_AMR; } + + // AMRTrackMetadata members + AMRTrackMetadata() { MOZ_COUNT_CTOR(AMRTrackMetadata); } + ~AMRTrackMetadata() { MOZ_COUNT_DTOR(AMRTrackMetadata); } +}; + +// EVRC sample rate is 8000 samples/s. +#define EVRC_SAMPLE_RATE 8000 + +class EVRCTrackMetadata : public AudioTrackMetadata { +public: + // AudioTrackMetadata members + // + // The number of sample sets generates by encoder is variant. So the + // frame duration and frame size are both 0. + uint32_t GetAudioFrameDuration() override { return 0; } + uint32_t GetAudioFrameSize() override { return 0; } + uint32_t GetAudioSampleRate() override { return EVRC_SAMPLE_RATE; } + uint32_t GetAudioChannels() override { return mChannels; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_EVRC; } + + // EVRCTrackMetadata members + EVRCTrackMetadata() + : mChannels(0) { + MOZ_COUNT_CTOR(EVRCTrackMetadata); + } + ~EVRCTrackMetadata() { MOZ_COUNT_DTOR(EVRCTrackMetadata); } + + uint32_t mChannels; // Channel number, it should be 1 or 2. +}; + +} + +#endif // ISOTrackMetadata_h_ diff --git a/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp b/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp new file mode 100644 index 000000000..72880b5cb --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp @@ -0,0 +1,138 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "MP4ESDS.h" + +namespace mozilla { + +nsresult +MP4AudioSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + nsresult rv = es->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + + *aBoxSize = size; + return NS_OK; +} + +nsresult +MP4AudioSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = AudioSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = es->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +MP4AudioSampleEntry::MP4AudioSampleEntry(ISOControl* aControl) + : AudioSampleEntry(NS_LITERAL_CSTRING("mp4a"), aControl) +{ + es = new ESDBox(aControl); + MOZ_COUNT_CTOR(MP4AudioSampleEntry); +} + +MP4AudioSampleEntry::~MP4AudioSampleEntry() +{ + MOZ_COUNT_DTOR(MP4AudioSampleEntry); +} + +nsresult +ESDBox::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + es_descriptor->Generate(&box_size); + size += box_size; + *aBoxSize = size; + return NS_OK; +} + +nsresult +ESDBox::Write() +{ + WRITE_FULLBOX(mControl, size) + es_descriptor->Write(); + return NS_OK; +} + +ESDBox::ESDBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("esds"), 0, 0, aControl) +{ + es_descriptor = new ES_Descriptor(aControl); + MOZ_COUNT_CTOR(ESDBox); +} + +ESDBox::~ESDBox() +{ + MOZ_COUNT_DTOR(ESDBox); +} + +nsresult +ES_Descriptor::Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) +{ + // ES_Descriptor is not a real ISOMediaBox, so we return nothing here. + return NS_OK; +} + +nsresult +ES_Descriptor::Write() +{ + mControl->Write(tag); + mControl->Write(length); + mControl->Write(ES_ID); + mControl->WriteBits(streamDependenceFlag.to_ulong(), streamDependenceFlag.size()); + mControl->WriteBits(URL_Flag.to_ulong(), URL_Flag.size()); + mControl->WriteBits(reserved.to_ulong(), reserved.size()); + mControl->WriteBits(streamPriority.to_ulong(), streamPriority.size()); + mControl->Write(DecodeSpecificInfo.Elements(), DecodeSpecificInfo.Length()); + + return NS_OK; +} + +nsresult +ES_Descriptor::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + // 14496-1 '8.3.4 DecoderConfigDescriptor' + // 14496-1 '10.2.3 SL Packet Header Configuration' + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + rv = frag->GetCSD(DecodeSpecificInfo); + NS_ENSURE_SUCCESS(rv, rv); + + length = sizeof(ES_ID) + 1; + length += DecodeSpecificInfo.Length(); + + *aBoxSize = sizeof(tag) + sizeof(length) + length; + return NS_OK; +} + +ES_Descriptor::ES_Descriptor(ISOControl* aControl) + : tag(ESDescrTag) + , length(0) + , ES_ID(0) + , streamDependenceFlag(0) + , URL_Flag(0) + , reserved(0) + , streamPriority(0) + , mControl(aControl) +{ + MOZ_COUNT_CTOR(ES_Descriptor); +} + +ES_Descriptor::~ES_Descriptor() +{ + MOZ_COUNT_DTOR(ES_Descriptor); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/MP4ESDS.h b/dom/media/encoder/fmp4_muxer/MP4ESDS.h new file mode 100644 index 000000000..ee91312c1 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/MP4ESDS.h @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MP4ESDS_h_ +#define MP4ESDS_h_ + +#include "nsTArray.h" +#include "MuxerOperation.h" + +namespace mozilla { + +class ISOControl; + +/** + * ESDS tag + */ +#define ESDescrTag 0x03 + +/** + * 14496-1 '8.3.3 ES_Descriptor'. + * It will get DecoderConfigDescriptor and SLConfigDescriptor from + * AAC CSD data. + */ +class ES_Descriptor : public MuxerOperation { +public: + // ISO BMFF members + uint8_t tag; // ESDescrTag + uint8_t length; + uint16_t ES_ID; + std::bitset<1> streamDependenceFlag; + std::bitset<1> URL_Flag; + std::bitset<1> reserved; + std::bitset<5> streamPriority; + + nsTArray<uint8_t> DecodeSpecificInfo; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) override; + + // ES_Descriptor methods + ES_Descriptor(ISOControl* aControl); + ~ES_Descriptor(); + +protected: + ISOControl* mControl; +}; + +// 14496-14 5.6 'Sample Description Boxes' +// Box type: 'esds' +class ESDBox : public FullBox { +public: + // ISO BMFF members + RefPtr<ES_Descriptor> es_descriptor; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // ESDBox methods + ESDBox(ISOControl* aControl); + ~ESDBox(); +}; + +// 14496-14 5.6 'Sample Description Boxes' +// Box type: 'mp4a' +class MP4AudioSampleEntry : public AudioSampleEntry { +public: + // ISO BMFF members + RefPtr<ESDBox> es; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MP4AudioSampleEntry methods + MP4AudioSampleEntry(ISOControl* aControl); + ~MP4AudioSampleEntry(); +}; + +} + +#endif // MP4ESDS_h_ diff --git a/dom/media/encoder/fmp4_muxer/MuxerOperation.h b/dom/media/encoder/fmp4_muxer/MuxerOperation.h new file mode 100644 index 000000000..0b83c89b0 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/MuxerOperation.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsTArray.h" + +#ifndef MuxerOperation_h_ +#define MuxerOperation_h_ + +namespace mozilla { + +/** + * The interface for ISO box. All Boxes inherit from this interface. + * Generate() and Write() are needed to be called to produce a complete box. + * + * Generate() will generate all the data structures and their size. + * + * Write() will write all data into muxing output stream (ISOControl actually) + * and update the data which can't be known at Generate() (for example, the + * offset of the video data in mp4 file). + * + * ISO base media format is composed of several container boxes and the contained + * boxes. The container boxes hold a list of MuxerOperation which is implemented + * by contained boxes. The contained boxes will be called via the list. + * For example: + * MovieBox (container) ---> boxes (array of MuxerOperation) + * |---> MovieHeaderBox (full box) + * |---> TrakBox (container) + * |---> MovieExtendsBox (container) + * + * The complete box structure can be found at 14496-12 E.2 "The‘isom’brand". + */ +class MuxerOperation { +public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MuxerOperation) + + // Generate data of this box and its contained box, and calculate box size. + virtual nsresult Generate(uint32_t* aBoxSize) = 0; + + // Write data to stream. + virtual nsresult Write() = 0; + + // Find the box type via its name (name is the box type defined in 14496-12; + // for example, 'moov' is the name of MovieBox). + // It can only look child boxes including itself and the box in the boxes + // list if exists. It can't look parent boxes. + virtual nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) = 0; + +protected: + virtual ~MuxerOperation() {} +}; + +} +#endif diff --git a/dom/media/encoder/fmp4_muxer/moz.build b/dom/media/encoder/fmp4_muxer/moz.build new file mode 100644 index 000000000..5ff274be5 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/moz.build @@ -0,0 +1,22 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + 'ISOMediaWriter.h', + 'ISOTrackMetadata.h', +] + +UNIFIED_SOURCES += [ + 'AMRBox.cpp', + 'AVCBox.cpp', + 'EVRCBox.cpp', + 'ISOControl.cpp', + 'ISOMediaBoxes.cpp', + 'ISOMediaWriter.cpp', + 'MP4ESDS.cpp', +] + +FINAL_LIBRARY = 'xul' |