summaryrefslogtreecommitdiffstats
path: root/dom/media/encoder/fmp4_muxer/ISOControl.h
blob: 3c445caee188ed5d7167e4a2365febf62494fe97 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef ISOCOMPOSITOR_H_
#define ISOCOMPOSITOR_H_

#include "mozilla/EndianUtils.h"
#include "nsTArray.h"
#include "ISOTrackMetadata.h"
#include "EncodedFrameContainer.h"

namespace mozilla {

class Box;
class ISOControl;

/**
 * This class collects elementary stream data to form a fragment.
 * ISOMediaWriter will check if the data is enough; if yes, the corresponding
 * moof will be created and write to ISOControl.
 * Each audio and video has its own fragment and only one during the whole
 * life cycle, when a fragment is formed in ISOControl, Flush() needs to
 * be called to reset it.
 */
class FragmentBuffer {
public:
  // aTrackType: it could be Audio_Track or Video_Track.
  // aFragDuration: it is the fragment duration. (microsecond per unit)
  //                Audio and video have the same fragment duration.
  FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration);
  ~FragmentBuffer();

  // Get samples of first fragment, that will swap all the elements in the
  // mFragArray[0] when aFlush = true, and caller is responsible for drop
  // EncodedFrame reference count.
  nsresult GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment,
                            bool aFlush = false);

  // Add sample frame to the last element fragment of mFragArray. If sample
  // number is enough, it will append a new fragment element. And the new
  // sample will be added to the new fragment element of mFragArray.
  nsresult AddFrame(EncodedFrame* aFrame);

  // Get total sample size of first complete fragment size.
  uint32_t GetFirstFragmentSampleSize();

  // Get sample number of first complete fragment.
  uint32_t GetFirstFragmentSampleNumber();

  // Check if it accumulates enough frame data.
  // It returns true when data is enough to form a fragment.
  bool HasEnoughData();

  // Called by ISOMediaWriter when TrackEncoder has sent the last frame. The
  // remains frame data will form the last moof and move the state machine to
  // in ISOMediaWriter to last phrase.
  nsresult SetEndOfStream() {
    mEOS = true;
    return  NS_OK;
  }
  bool EOS() { return mEOS; }

  // CSD (codec specific data), it is generated by encoder and the data depends
  // on codec type. This data will be sent as a special frame from encoder to
  // ISOMediaWriter and pass to this class via AddFrame().
  nsresult GetCSD(nsTArray<uint8_t>& aCSD);

  bool HasCSD() { return mCSDFrame; }

  uint32_t GetType() { return mTrackType; }

  void SetLastFragmentLastFrameTime(uint32_t aTime) {
    mLastFrameTimeOfLastFragment = aTime;
  }

  uint32_t GetLastFragmentLastFrameTime() {
    return mLastFrameTimeOfLastFragment;
  }

private:
  uint32_t mTrackType;

  // Fragment duration, microsecond per unit.
  uint32_t mFragDuration;

  // Media start time, microsecond per unit.
  // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(),
  // when the difference between current frame time and mMediaStartTime is
  // exceeded current fragment ceiling timeframe, that means current fragment has
  // enough data and a new element in mFragArray will be added.
  uint64_t mMediaStartTime;

  // Current fragment number. It will be increase when a new element of
  // mFragArray is created.
  // Note:
  //   It only means the fragment number of current accumulated frames, not
  //   the current 'creating' fragment mFragNum in ISOControl.
  uint32_t mFragmentNumber;

  // The last frame time stamp of last fragment. It is for calculating the
  // play duration of first frame in current fragment. The frame duration is
  // defined as "current frame timestamp - last frame timestamp" here. So it
  // needs to keep the last timestamp of last fragment.
  uint32_t mLastFrameTimeOfLastFragment;

  // Array of fragments, each element has enough samples to form a
  // complete fragment.
  nsTArray<nsTArray<RefPtr<EncodedFrame>>> mFragArray;

  // Codec specific data frame, it will be generated by encoder and send to
  // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends
  // on codec type.
  RefPtr<EncodedFrame> mCSDFrame;

  // END_OF_STREAM from ContainerWriter
  bool mEOS;
};

/**
 * ISOControl will be carried to each box when box is created. It is the main
 * bridge for box to output stream to ContainerWriter and retrieve information.
 * ISOControl acts 3 different roles:
 * 1. Holds the pointer of audio metadata, video metadata, fragment and
 *    pass them to boxes.
 * 2. Provide the functions to generate the base structure of MP4; they are
 *    GenerateFtyp, GenerateMoov, GenerateMoof, and GenerateMfra.
 * 3. The actually writer used by MuxOperation::Write() in each box. It provides
 *    writing methods for different kind of data; they are Write, WriteArray,
 *    WriteBits...etc.
 */
class ISOControl {

friend class Box;

public:
  ISOControl(uint32_t aMuxingType);
  ~ISOControl();

  nsresult GenerateFtyp();
  nsresult GenerateMoov();
  nsresult GenerateMoof(uint32_t aTrackType);

  // Swap elementary stream pointer to output buffers.
  uint32_t WriteAVData(nsTArray<uint8_t>& aArray);

  uint32_t Write(uint8_t* aBuf, uint32_t aSize);

  uint32_t Write(uint8_t aData);

  template <typename T>
  uint32_t Write(T aData) {
    MOZ_ASSERT(!mBitCount);

    aData = NativeEndian::swapToNetworkOrder(aData);
    Write((uint8_t*)&aData, sizeof(T));
    return sizeof(T);
  }

  template <typename T>
  uint32_t WriteArray(const T &aArray, uint32_t aSize) {
    MOZ_ASSERT(!mBitCount);

    uint32_t size = 0;
    for (uint32_t i = 0; i < aSize; i++) {
      size += Write(aArray[i]);
    }
    return size;
  }

  uint32_t WriteFourCC(const char* aType);

  // Bit writing. Note: it needs to be byte-boundary before using
  // others non-bit writing function.
  uint32_t WriteBits(uint64_t aBits, size_t aNumBits);

  // This is called by GetContainerData and swap all the buffers to aOutputBuffers.
  nsresult GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs);

  // Presentation time in seconds since midnight, Jan. 1, 1904, in UTC time.
  uint32_t GetTime();

  // current fragment number
  uint32_t GetCurFragmentNumber() { return mFragNum; }

  nsresult SetFragment(FragmentBuffer* aFragment);
  FragmentBuffer* GetFragment(uint32_t aType);

  uint32_t GetMuxingType() { return mMuxingType; }

  nsresult SetMetadata(TrackMetadataBase* aTrackMeta);
  nsresult GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta);
  nsresult GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta);

  // Track ID is the Metadata index in mMetaArray. It allows only 1 audio
  // track and 1 video track in this muxer. In this muxer, it is prohibt to have
  // mutiple audio track or video track in the same file.
  uint32_t GetTrackID(TrackMetadataBase::MetadataKind aKind);
  uint32_t GetNextTrackID();

  bool HasAudioTrack();
  bool HasVideoTrack();

private:
  uint32_t GetBufPos();
  nsresult FlushBuf();

  // One of value in TYPE_XXX, defined in ISOMediaWriter.
  uint32_t mMuxingType;

  // Audio and video fragments are owned by ISOMediaWriter.
  // They don't need to worry about pointer going stale because ISOMediaWriter's
  // lifetime is longer than ISOControl.
  FragmentBuffer* mAudioFragmentBuffer;
  FragmentBuffer* mVideoFragmentBuffer;

  // Generated fragment number
  uint32_t mFragNum;

  // The (index + 1) will be the track ID.
  nsTArray<RefPtr<TrackMetadataBase>> mMetaArray;

  // Array of output buffers.
  // To save memory usage, audio/video sample will be swapped into a new element
  // of this array.
  //
  // For example,
  //   mOutBuffers[0] --> boxes (allocated by muxer)
  //   mOutBuffers[1] --> video raw data (allocated by encoder)
  //   mOutBuffers[2] --> video raw data (allocated by encoder)
  //   mOutBuffers[3] --> video raw data (allocated by encoder)
  //   mOutBuffers[4] --> boxes (allocated by muxer)
  //   mOutBuffers[5] --> audio raw data (allocated by encoder)
  //   ...etc.
  //
  nsTArray<nsTArray<uint8_t>> mOutBuffers;

  // Accumulate output size from Write().
  uint64_t mOutputSize;

  // Bit writing operation. Note: the mBitCount should be 0 before any
  // byte-boundary writing method be called (Write(uint32_t), Write(uint16_t)...etc);
  // otherwise, there will be assertion on these functions.
  uint8_t mBitCount;
  uint8_t mBit;
};

}
#endif