summaryrefslogtreecommitdiffstats
path: root/dom/media/encoder/fmp4_muxer/ISOControl.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/encoder/fmp4_muxer/ISOControl.h')
-rw-r--r--dom/media/encoder/fmp4_muxer/ISOControl.h250
1 files changed, 250 insertions, 0 deletions
diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.h b/dom/media/encoder/fmp4_muxer/ISOControl.h
new file mode 100644
index 000000000..3c445caee
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOControl.h
@@ -0,0 +1,250 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ISOCOMPOSITOR_H_
+#define ISOCOMPOSITOR_H_
+
+#include "mozilla/EndianUtils.h"
+#include "nsTArray.h"
+#include "ISOTrackMetadata.h"
+#include "EncodedFrameContainer.h"
+
+namespace mozilla {
+
+class Box;
+class ISOControl;
+
+/**
+ * This class collects elementary stream data to form a fragment.
+ * ISOMediaWriter will check if the data is enough; if yes, the corresponding
+ * moof will be created and write to ISOControl.
+ * Each audio and video has its own fragment and only one during the whole
+ * life cycle, when a fragment is formed in ISOControl, Flush() needs to
+ * be called to reset it.
+ */
+class FragmentBuffer {
+public:
+ // aTrackType: it could be Audio_Track or Video_Track.
+ // aFragDuration: it is the fragment duration. (microsecond per unit)
+ // Audio and video have the same fragment duration.
+ FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration);
+ ~FragmentBuffer();
+
+ // Get samples of first fragment, that will swap all the elements in the
+ // mFragArray[0] when aFlush = true, and caller is responsible for drop
+ // EncodedFrame reference count.
+ nsresult GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment,
+ bool aFlush = false);
+
+ // Add sample frame to the last element fragment of mFragArray. If sample
+ // number is enough, it will append a new fragment element. And the new
+ // sample will be added to the new fragment element of mFragArray.
+ nsresult AddFrame(EncodedFrame* aFrame);
+
+ // Get total sample size of first complete fragment size.
+ uint32_t GetFirstFragmentSampleSize();
+
+ // Get sample number of first complete fragment.
+ uint32_t GetFirstFragmentSampleNumber();
+
+ // Check if it accumulates enough frame data.
+ // It returns true when data is enough to form a fragment.
+ bool HasEnoughData();
+
+ // Called by ISOMediaWriter when TrackEncoder has sent the last frame. The
+ // remains frame data will form the last moof and move the state machine to
+ // in ISOMediaWriter to last phrase.
+ nsresult SetEndOfStream() {
+ mEOS = true;
+ return NS_OK;
+ }
+ bool EOS() { return mEOS; }
+
+ // CSD (codec specific data), it is generated by encoder and the data depends
+ // on codec type. This data will be sent as a special frame from encoder to
+ // ISOMediaWriter and pass to this class via AddFrame().
+ nsresult GetCSD(nsTArray<uint8_t>& aCSD);
+
+ bool HasCSD() { return mCSDFrame; }
+
+ uint32_t GetType() { return mTrackType; }
+
+ void SetLastFragmentLastFrameTime(uint32_t aTime) {
+ mLastFrameTimeOfLastFragment = aTime;
+ }
+
+ uint32_t GetLastFragmentLastFrameTime() {
+ return mLastFrameTimeOfLastFragment;
+ }
+
+private:
+ uint32_t mTrackType;
+
+ // Fragment duration, microsecond per unit.
+ uint32_t mFragDuration;
+
+ // Media start time, microsecond per unit.
+ // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(),
+ // when the difference between current frame time and mMediaStartTime is
+ // exceeded current fragment ceiling timeframe, that means current fragment has
+ // enough data and a new element in mFragArray will be added.
+ uint64_t mMediaStartTime;
+
+ // Current fragment number. It will be increase when a new element of
+ // mFragArray is created.
+ // Note:
+ // It only means the fragment number of current accumulated frames, not
+ // the current 'creating' fragment mFragNum in ISOControl.
+ uint32_t mFragmentNumber;
+
+ // The last frame time stamp of last fragment. It is for calculating the
+ // play duration of first frame in current fragment. The frame duration is
+ // defined as "current frame timestamp - last frame timestamp" here. So it
+ // needs to keep the last timestamp of last fragment.
+ uint32_t mLastFrameTimeOfLastFragment;
+
+ // Array of fragments, each element has enough samples to form a
+ // complete fragment.
+ nsTArray<nsTArray<RefPtr<EncodedFrame>>> mFragArray;
+
+ // Codec specific data frame, it will be generated by encoder and send to
+ // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends
+ // on codec type.
+ RefPtr<EncodedFrame> mCSDFrame;
+
+ // END_OF_STREAM from ContainerWriter
+ bool mEOS;
+};
+
+/**
+ * ISOControl will be carried to each box when box is created. It is the main
+ * bridge for box to output stream to ContainerWriter and retrieve information.
+ * ISOControl acts 3 different roles:
+ * 1. Holds the pointer of audio metadata, video metadata, fragment and
+ * pass them to boxes.
+ * 2. Provide the functions to generate the base structure of MP4; they are
+ * GenerateFtyp, GenerateMoov, GenerateMoof, and GenerateMfra.
+ * 3. The actually writer used by MuxOperation::Write() in each box. It provides
+ * writing methods for different kind of data; they are Write, WriteArray,
+ * WriteBits...etc.
+ */
+class ISOControl {
+
+friend class Box;
+
+public:
+ ISOControl(uint32_t aMuxingType);
+ ~ISOControl();
+
+ nsresult GenerateFtyp();
+ nsresult GenerateMoov();
+ nsresult GenerateMoof(uint32_t aTrackType);
+
+ // Swap elementary stream pointer to output buffers.
+ uint32_t WriteAVData(nsTArray<uint8_t>& aArray);
+
+ uint32_t Write(uint8_t* aBuf, uint32_t aSize);
+
+ uint32_t Write(uint8_t aData);
+
+ template <typename T>
+ uint32_t Write(T aData) {
+ MOZ_ASSERT(!mBitCount);
+
+ aData = NativeEndian::swapToNetworkOrder(aData);
+ Write((uint8_t*)&aData, sizeof(T));
+ return sizeof(T);
+ }
+
+ template <typename T>
+ uint32_t WriteArray(const T &aArray, uint32_t aSize) {
+ MOZ_ASSERT(!mBitCount);
+
+ uint32_t size = 0;
+ for (uint32_t i = 0; i < aSize; i++) {
+ size += Write(aArray[i]);
+ }
+ return size;
+ }
+
+ uint32_t WriteFourCC(const char* aType);
+
+ // Bit writing. Note: it needs to be byte-boundary before using
+ // others non-bit writing function.
+ uint32_t WriteBits(uint64_t aBits, size_t aNumBits);
+
+ // This is called by GetContainerData and swap all the buffers to aOutputBuffers.
+ nsresult GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs);
+
+ // Presentation time in seconds since midnight, Jan. 1, 1904, in UTC time.
+ uint32_t GetTime();
+
+ // current fragment number
+ uint32_t GetCurFragmentNumber() { return mFragNum; }
+
+ nsresult SetFragment(FragmentBuffer* aFragment);
+ FragmentBuffer* GetFragment(uint32_t aType);
+
+ uint32_t GetMuxingType() { return mMuxingType; }
+
+ nsresult SetMetadata(TrackMetadataBase* aTrackMeta);
+ nsresult GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta);
+ nsresult GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta);
+
+ // Track ID is the Metadata index in mMetaArray. It allows only 1 audio
+ // track and 1 video track in this muxer. In this muxer, it is prohibt to have
+ // mutiple audio track or video track in the same file.
+ uint32_t GetTrackID(TrackMetadataBase::MetadataKind aKind);
+ uint32_t GetNextTrackID();
+
+ bool HasAudioTrack();
+ bool HasVideoTrack();
+
+private:
+ uint32_t GetBufPos();
+ nsresult FlushBuf();
+
+ // One of value in TYPE_XXX, defined in ISOMediaWriter.
+ uint32_t mMuxingType;
+
+ // Audio and video fragments are owned by ISOMediaWriter.
+ // They don't need to worry about pointer going stale because ISOMediaWriter's
+ // lifetime is longer than ISOControl.
+ FragmentBuffer* mAudioFragmentBuffer;
+ FragmentBuffer* mVideoFragmentBuffer;
+
+ // Generated fragment number
+ uint32_t mFragNum;
+
+ // The (index + 1) will be the track ID.
+ nsTArray<RefPtr<TrackMetadataBase>> mMetaArray;
+
+ // Array of output buffers.
+ // To save memory usage, audio/video sample will be swapped into a new element
+ // of this array.
+ //
+ // For example,
+ // mOutBuffers[0] --> boxes (allocated by muxer)
+ // mOutBuffers[1] --> video raw data (allocated by encoder)
+ // mOutBuffers[2] --> video raw data (allocated by encoder)
+ // mOutBuffers[3] --> video raw data (allocated by encoder)
+ // mOutBuffers[4] --> boxes (allocated by muxer)
+ // mOutBuffers[5] --> audio raw data (allocated by encoder)
+ // ...etc.
+ //
+ nsTArray<nsTArray<uint8_t>> mOutBuffers;
+
+ // Accumulate output size from Write().
+ uint64_t mOutputSize;
+
+ // Bit writing operation. Note: the mBitCount should be 0 before any
+ // byte-boundary writing method be called (Write(uint32_t), Write(uint16_t)...etc);
+ // otherwise, there will be assertion on these functions.
+ uint8_t mBitCount;
+ uint8_t mBit;
+};
+
+}
+#endif