diff options
Diffstat (limited to 'dom/media/encoder/fmp4_muxer/ISOControl.h')
-rw-r--r-- | dom/media/encoder/fmp4_muxer/ISOControl.h | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.h b/dom/media/encoder/fmp4_muxer/ISOControl.h new file mode 100644 index 000000000..3c445caee --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOControl.h @@ -0,0 +1,250 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOCOMPOSITOR_H_ +#define ISOCOMPOSITOR_H_ + +#include "mozilla/EndianUtils.h" +#include "nsTArray.h" +#include "ISOTrackMetadata.h" +#include "EncodedFrameContainer.h" + +namespace mozilla { + +class Box; +class ISOControl; + +/** + * This class collects elementary stream data to form a fragment. + * ISOMediaWriter will check if the data is enough; if yes, the corresponding + * moof will be created and write to ISOControl. + * Each audio and video has its own fragment and only one during the whole + * life cycle, when a fragment is formed in ISOControl, Flush() needs to + * be called to reset it. + */ +class FragmentBuffer { +public: + // aTrackType: it could be Audio_Track or Video_Track. + // aFragDuration: it is the fragment duration. (microsecond per unit) + // Audio and video have the same fragment duration. + FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration); + ~FragmentBuffer(); + + // Get samples of first fragment, that will swap all the elements in the + // mFragArray[0] when aFlush = true, and caller is responsible for drop + // EncodedFrame reference count. + nsresult GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment, + bool aFlush = false); + + // Add sample frame to the last element fragment of mFragArray. If sample + // number is enough, it will append a new fragment element. And the new + // sample will be added to the new fragment element of mFragArray. + nsresult AddFrame(EncodedFrame* aFrame); + + // Get total sample size of first complete fragment size. + uint32_t GetFirstFragmentSampleSize(); + + // Get sample number of first complete fragment. + uint32_t GetFirstFragmentSampleNumber(); + + // Check if it accumulates enough frame data. + // It returns true when data is enough to form a fragment. + bool HasEnoughData(); + + // Called by ISOMediaWriter when TrackEncoder has sent the last frame. The + // remains frame data will form the last moof and move the state machine to + // in ISOMediaWriter to last phrase. + nsresult SetEndOfStream() { + mEOS = true; + return NS_OK; + } + bool EOS() { return mEOS; } + + // CSD (codec specific data), it is generated by encoder and the data depends + // on codec type. This data will be sent as a special frame from encoder to + // ISOMediaWriter and pass to this class via AddFrame(). + nsresult GetCSD(nsTArray<uint8_t>& aCSD); + + bool HasCSD() { return mCSDFrame; } + + uint32_t GetType() { return mTrackType; } + + void SetLastFragmentLastFrameTime(uint32_t aTime) { + mLastFrameTimeOfLastFragment = aTime; + } + + uint32_t GetLastFragmentLastFrameTime() { + return mLastFrameTimeOfLastFragment; + } + +private: + uint32_t mTrackType; + + // Fragment duration, microsecond per unit. + uint32_t mFragDuration; + + // Media start time, microsecond per unit. + // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(), + // when the difference between current frame time and mMediaStartTime is + // exceeded current fragment ceiling timeframe, that means current fragment has + // enough data and a new element in mFragArray will be added. + uint64_t mMediaStartTime; + + // Current fragment number. It will be increase when a new element of + // mFragArray is created. + // Note: + // It only means the fragment number of current accumulated frames, not + // the current 'creating' fragment mFragNum in ISOControl. + uint32_t mFragmentNumber; + + // The last frame time stamp of last fragment. It is for calculating the + // play duration of first frame in current fragment. The frame duration is + // defined as "current frame timestamp - last frame timestamp" here. So it + // needs to keep the last timestamp of last fragment. + uint32_t mLastFrameTimeOfLastFragment; + + // Array of fragments, each element has enough samples to form a + // complete fragment. + nsTArray<nsTArray<RefPtr<EncodedFrame>>> mFragArray; + + // Codec specific data frame, it will be generated by encoder and send to + // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends + // on codec type. + RefPtr<EncodedFrame> mCSDFrame; + + // END_OF_STREAM from ContainerWriter + bool mEOS; +}; + +/** + * ISOControl will be carried to each box when box is created. It is the main + * bridge for box to output stream to ContainerWriter and retrieve information. + * ISOControl acts 3 different roles: + * 1. Holds the pointer of audio metadata, video metadata, fragment and + * pass them to boxes. + * 2. Provide the functions to generate the base structure of MP4; they are + * GenerateFtyp, GenerateMoov, GenerateMoof, and GenerateMfra. + * 3. The actually writer used by MuxOperation::Write() in each box. It provides + * writing methods for different kind of data; they are Write, WriteArray, + * WriteBits...etc. + */ +class ISOControl { + +friend class Box; + +public: + ISOControl(uint32_t aMuxingType); + ~ISOControl(); + + nsresult GenerateFtyp(); + nsresult GenerateMoov(); + nsresult GenerateMoof(uint32_t aTrackType); + + // Swap elementary stream pointer to output buffers. + uint32_t WriteAVData(nsTArray<uint8_t>& aArray); + + uint32_t Write(uint8_t* aBuf, uint32_t aSize); + + uint32_t Write(uint8_t aData); + + template <typename T> + uint32_t Write(T aData) { + MOZ_ASSERT(!mBitCount); + + aData = NativeEndian::swapToNetworkOrder(aData); + Write((uint8_t*)&aData, sizeof(T)); + return sizeof(T); + } + + template <typename T> + uint32_t WriteArray(const T &aArray, uint32_t aSize) { + MOZ_ASSERT(!mBitCount); + + uint32_t size = 0; + for (uint32_t i = 0; i < aSize; i++) { + size += Write(aArray[i]); + } + return size; + } + + uint32_t WriteFourCC(const char* aType); + + // Bit writing. Note: it needs to be byte-boundary before using + // others non-bit writing function. + uint32_t WriteBits(uint64_t aBits, size_t aNumBits); + + // This is called by GetContainerData and swap all the buffers to aOutputBuffers. + nsresult GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs); + + // Presentation time in seconds since midnight, Jan. 1, 1904, in UTC time. + uint32_t GetTime(); + + // current fragment number + uint32_t GetCurFragmentNumber() { return mFragNum; } + + nsresult SetFragment(FragmentBuffer* aFragment); + FragmentBuffer* GetFragment(uint32_t aType); + + uint32_t GetMuxingType() { return mMuxingType; } + + nsresult SetMetadata(TrackMetadataBase* aTrackMeta); + nsresult GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta); + nsresult GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta); + + // Track ID is the Metadata index in mMetaArray. It allows only 1 audio + // track and 1 video track in this muxer. In this muxer, it is prohibt to have + // mutiple audio track or video track in the same file. + uint32_t GetTrackID(TrackMetadataBase::MetadataKind aKind); + uint32_t GetNextTrackID(); + + bool HasAudioTrack(); + bool HasVideoTrack(); + +private: + uint32_t GetBufPos(); + nsresult FlushBuf(); + + // One of value in TYPE_XXX, defined in ISOMediaWriter. + uint32_t mMuxingType; + + // Audio and video fragments are owned by ISOMediaWriter. + // They don't need to worry about pointer going stale because ISOMediaWriter's + // lifetime is longer than ISOControl. + FragmentBuffer* mAudioFragmentBuffer; + FragmentBuffer* mVideoFragmentBuffer; + + // Generated fragment number + uint32_t mFragNum; + + // The (index + 1) will be the track ID. + nsTArray<RefPtr<TrackMetadataBase>> mMetaArray; + + // Array of output buffers. + // To save memory usage, audio/video sample will be swapped into a new element + // of this array. + // + // For example, + // mOutBuffers[0] --> boxes (allocated by muxer) + // mOutBuffers[1] --> video raw data (allocated by encoder) + // mOutBuffers[2] --> video raw data (allocated by encoder) + // mOutBuffers[3] --> video raw data (allocated by encoder) + // mOutBuffers[4] --> boxes (allocated by muxer) + // mOutBuffers[5] --> audio raw data (allocated by encoder) + // ...etc. + // + nsTArray<nsTArray<uint8_t>> mOutBuffers; + + // Accumulate output size from Write(). + uint64_t mOutputSize; + + // Bit writing operation. Note: the mBitCount should be 0 before any + // byte-boundary writing method be called (Write(uint32_t), Write(uint16_t)...etc); + // otherwise, there will be assertion on these functions. + uint8_t mBitCount; + uint8_t mBit; +}; + +} +#endif |