Add m-esr52 at 52.6.0

author: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
committer: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
commit: 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree: 10027f336435511475e392454359edea8e25895d /dom/media/encoder
parent: 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download: UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
29 files changed, 7204 insertions, 0 deletions
diff --git a/dom/media/encoder/ContainerWriter.h b/dom/media/encoder/ContainerWriter.h
new file mode 100644
index 000000000..1bd66cbc6
--- /dev/null
+++ b/dom/media/encoder/ContainerWriter.h
@@ -0,0 +1,78 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ContainerWriter_h_
+#define ContainerWriter_h_
+
+#include "nsTArray.h"
+#include "EncodedFrameContainer.h"
+#include "TrackMetadataBase.h"
+
+namespace mozilla {
+/**
+ * ContainerWriter packs encoded track data into a specific media container.
+ */
+class ContainerWriter {
+public:
+  ContainerWriter()
+    : mInitialized(false)
+    , mIsWritingComplete(false)
+  {}
+  virtual ~ContainerWriter() {}
+  // Mapping to DOMLocalMediaStream::TrackTypeHints
+  enum {
+    CREATE_AUDIO_TRACK = 1 << 0,
+    CREATE_VIDEO_TRACK = 1 << 1,
+  };
+  enum {
+    END_OF_STREAM = 1 << 0
+  };
+
+  /**
+   * Writes encoded track data from aBuffer to a packet, and insert this packet
+   * into the internal stream of container writer. aDuration is the playback
+   * duration of this packet in number of samples. aFlags is true with
+   * END_OF_STREAM if this is the last packet of track.
+   * Currently, WriteEncodedTrack doesn't support multiple tracks.
+   */
+  virtual nsresult WriteEncodedTrack(const EncodedFrameContainer& aData,
+                                     uint32_t aFlags = 0) = 0;
+
+  /**
+   * Set the meta data pointer into muxer
+   * This function will check the integrity of aMetadata.
+   * If the meta data isn't well format, this function will return NS_ERROR_FAILURE to caller,
+   * else save the pointer to mMetadata and return NS_OK.
+   */
+  virtual nsresult SetMetadata(TrackMetadataBase* aMetadata) = 0;
+
+  /**
+   * Indicate if the writer has finished to output data
+   */
+  virtual bool IsWritingComplete() { return mIsWritingComplete; }
+
+  enum {
+    FLUSH_NEEDED = 1 << 0,
+    GET_HEADER = 1 << 1
+  };
+
+  /**
+   * Copies the final container data to a buffer if it has accumulated enough
+   * packets from WriteEncodedTrack. This buffer of data is appended to
+   * aOutputBufs, and existing elements of aOutputBufs should not be modified.
+   * aFlags is true with FLUSH_NEEDED will force OggWriter to flush an ogg page
+   * even it is not full, and copy these container data to a buffer for
+   * aOutputBufs to append.
+   */
+  virtual nsresult GetContainerData(nsTArray<nsTArray<uint8_t> >* aOutputBufs,
+                                    uint32_t aFlags = 0) = 0;
+protected:
+  bool mInitialized;
+  bool mIsWritingComplete;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/EncodedFrameContainer.h b/dom/media/encoder/EncodedFrameContainer.h
new file mode 100644
index 000000000..8b7512466
--- /dev/null
+++ b/dom/media/encoder/EncodedFrameContainer.h
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef EncodedFrameContainer_H_
+#define EncodedFrameContainer_H_
+
+#include "nsTArray.h"
+
+namespace mozilla {
+
+class EncodedFrame;
+
+/*
+ * This container is used to carry video or audio encoded data from encoder to muxer.
+ * The media data object is created by encoder and recycle by the destructor.
+ * Only allow to store audio or video encoded data in EncodedData.
+ */
+class EncodedFrameContainer
+{
+public:
+  // Append encoded frame data
+  void AppendEncodedFrame(EncodedFrame* aEncodedFrame)
+  {
+    mEncodedFrames.AppendElement(aEncodedFrame);
+  }
+  // Retrieve all of the encoded frames
+  const nsTArray<RefPtr<EncodedFrame> >& GetEncodedFrames() const
+  {
+    return mEncodedFrames;
+  }
+private:
+  // This container is used to store the video or audio encoded packets.
+  // Muxer should check mFrameType and get the encoded data type from mEncodedFrames.
+  nsTArray<RefPtr<EncodedFrame> > mEncodedFrames;
+};
+
+// Represent one encoded frame
+class EncodedFrame final
+{
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncodedFrame)
+public:
+  EncodedFrame() :
+    mTimeStamp(0),
+    mDuration(0),
+    mFrameType(UNKNOWN)
+  {}
+  enum FrameType {
+    VP8_I_FRAME,      // VP8 intraframe
+    VP8_P_FRAME,      // VP8 predicted frame
+    OPUS_AUDIO_FRAME, // Opus audio frame
+    VORBIS_AUDIO_FRAME,
+    AVC_I_FRAME,
+    AVC_P_FRAME,
+    AVC_B_FRAME,
+    AVC_CSD,          // AVC codec specific data
+    AAC_AUDIO_FRAME,
+    AAC_CSD,          // AAC codec specific data
+    AMR_AUDIO_CSD,
+    AMR_AUDIO_FRAME,
+    EVRC_AUDIO_CSD,
+    EVRC_AUDIO_FRAME,
+    UNKNOWN           // FrameType not set
+  };
+  void SwapInFrameData(nsTArray<uint8_t>& aData)
+  {
+    mFrameData.SwapElements(aData);
+  }
+  nsresult SwapOutFrameData(nsTArray<uint8_t>& aData)
+  {
+    if (mFrameType != UNKNOWN) {
+      // Reset this frame type to UNKNOWN once the data is swapped out.
+      mFrameData.SwapElements(aData);
+      mFrameType = UNKNOWN;
+      return NS_OK;
+    }
+    return NS_ERROR_FAILURE;
+  }
+  const nsTArray<uint8_t>& GetFrameData() const
+  {
+    return mFrameData;
+  }
+  uint64_t GetTimeStamp() const { return mTimeStamp; }
+  void SetTimeStamp(uint64_t aTimeStamp) { mTimeStamp = aTimeStamp; }
+
+  uint64_t GetDuration() const { return mDuration; }
+  void SetDuration(uint64_t aDuration) { mDuration = aDuration; }
+
+  FrameType GetFrameType() const { return mFrameType; }
+  void SetFrameType(FrameType aFrameType) { mFrameType = aFrameType; }
+private:
+  // Private destructor, to discourage deletion outside of Release():
+  ~EncodedFrame()
+  {
+  }
+
+  // Encoded data
+  nsTArray<uint8_t> mFrameData;
+  uint64_t mTimeStamp;
+  // The playback duration of this packet in number of samples
+  uint64_t mDuration;
+  // Represent what is in the FrameData
+  FrameType mFrameType;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/MediaEncoder.cpp b/dom/media/encoder/MediaEncoder.cpp
new file mode 100644
index 000000000..864b486e4
--- /dev/null
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -0,0 +1,404 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "MediaEncoder.h"
+#include "MediaDecoder.h"
+#include "nsIPrincipal.h"
+#include "nsMimeTypes.h"
+#include "mozilla/Logging.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/gfx/Point.h" // IntSize
+
+#include"GeckoProfiler.h"
+#include "OggWriter.h"
+#include "OpusTrackEncoder.h"
+
+#ifdef MOZ_WEBM_ENCODER
+#include "VP8TrackEncoder.h"
+#include "WebMWriter.h"
+#endif
+
+#ifdef LOG
+#undef LOG
+#endif
+
+mozilla::LazyLogModule gMediaEncoderLog("MediaEncoder");
+#define LOG(type, msg) MOZ_LOG(gMediaEncoderLog, type, msg)
+
+namespace mozilla {
+
+void
+MediaStreamVideoRecorderSink::SetCurrentFrames(const VideoSegment& aSegment)
+{
+  MOZ_ASSERT(mVideoEncoder);
+  mVideoEncoder->SetCurrentFrames(aSegment);
+}
+
+void
+MediaEncoder::SetDirectConnect(bool aConnected)
+{
+  mDirectConnected = aConnected;
+}
+
+void
+MediaEncoder::NotifyRealtimeData(MediaStreamGraph* aGraph,
+                                 TrackID aID,
+                                 StreamTime aTrackOffset,
+                                 uint32_t aTrackEvents,
+                                 const MediaSegment& aRealtimeMedia)
+{
+  if (mSuspended == RECORD_NOT_SUSPENDED) {
+    // Process the incoming raw track data from MediaStreamGraph, called on the
+    // thread of MediaStreamGraph.
+    if (mAudioEncoder && aRealtimeMedia.GetType() == MediaSegment::AUDIO) {
+      mAudioEncoder->NotifyQueuedTrackChanges(aGraph, aID,
+                                              aTrackOffset, aTrackEvents,
+                                              aRealtimeMedia);
+    } else if (mVideoEncoder &&
+               aRealtimeMedia.GetType() == MediaSegment::VIDEO &&
+               aTrackEvents != TrackEventCommand::TRACK_EVENT_NONE) {
+      mVideoEncoder->NotifyQueuedTrackChanges(aGraph, aID,
+                                              aTrackOffset, aTrackEvents,
+                                              aRealtimeMedia);
+    }
+  }
+}
+
+void
+MediaEncoder::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
+                                       TrackID aID,
+                                       StreamTime aTrackOffset,
+                                       TrackEventCommand aTrackEvents,
+                                       const MediaSegment& aQueuedMedia,
+                                       MediaStream* aInputStream,
+                                       TrackID aInputTrackID)
+{
+  if (!mDirectConnected) {
+    NotifyRealtimeData(aGraph, aID, aTrackOffset, aTrackEvents, aQueuedMedia);
+  } else {
+    if (aTrackEvents != TrackEventCommand::TRACK_EVENT_NONE) {
+      // forward events (TRACK_EVENT_ENDED) but not the media
+      if (aQueuedMedia.GetType() == MediaSegment::VIDEO) {
+        VideoSegment segment;
+        NotifyRealtimeData(aGraph, aID, aTrackOffset, aTrackEvents, segment);
+      } else {
+        AudioSegment segment;
+        NotifyRealtimeData(aGraph, aID, aTrackOffset, aTrackEvents, segment);
+      }
+    }
+    if (mSuspended == RECORD_RESUMED) {
+      if (mVideoEncoder) {
+        if (aQueuedMedia.GetType() == MediaSegment::VIDEO) {
+          // insert a null frame of duration equal to the first segment passed
+          // after Resume(), so it'll get added to one of the DirectListener frames
+          VideoSegment segment;
+          gfx::IntSize size(0,0);
+          segment.AppendFrame(nullptr, aQueuedMedia.GetDuration(), size,
+                              PRINCIPAL_HANDLE_NONE);
+          mVideoEncoder->NotifyQueuedTrackChanges(aGraph, aID,
+                                                  aTrackOffset, aTrackEvents,
+                                                  segment);
+          mSuspended = RECORD_NOT_SUSPENDED;
+        }
+      } else {
+        mSuspended = RECORD_NOT_SUSPENDED; // no video
+      }
+    }
+  }
+}
+
+void
+MediaEncoder::NotifyQueuedAudioData(MediaStreamGraph* aGraph, TrackID aID,
+                                    StreamTime aTrackOffset,
+                                    const AudioSegment& aQueuedMedia,
+                                    MediaStream* aInputStream,
+                                    TrackID aInputTrackID)
+{
+  if (!mDirectConnected) {
+    NotifyRealtimeData(aGraph, aID, aTrackOffset, 0, aQueuedMedia);
+  } else {
+    if (mSuspended == RECORD_RESUMED) {
+      if (!mVideoEncoder) {
+        mSuspended = RECORD_NOT_SUSPENDED; // no video
+      }
+    }
+  }
+}
+
+void
+MediaEncoder::NotifyEvent(MediaStreamGraph* aGraph,
+                          MediaStreamGraphEvent event)
+{
+  // In case that MediaEncoder does not receive a TRACK_EVENT_ENDED event.
+  LOG(LogLevel::Debug, ("NotifyRemoved in [MediaEncoder]."));
+  if (mAudioEncoder) {
+    mAudioEncoder->NotifyEvent(aGraph, event);
+  }
+  if (mVideoEncoder) {
+    mVideoEncoder->NotifyEvent(aGraph, event);
+  }
+}
+
+/* static */
+already_AddRefed<MediaEncoder>
+MediaEncoder::CreateEncoder(const nsAString& aMIMEType, uint32_t aAudioBitrate,
+                            uint32_t aVideoBitrate, uint32_t aBitrate,
+                            uint8_t aTrackTypes,
+                            TrackRate aTrackRate)
+{
+  PROFILER_LABEL("MediaEncoder", "CreateEncoder",
+    js::ProfileEntry::Category::OTHER);
+
+  nsAutoPtr<ContainerWriter> writer;
+  nsAutoPtr<AudioTrackEncoder> audioEncoder;
+  nsAutoPtr<VideoTrackEncoder> videoEncoder;
+  RefPtr<MediaEncoder> encoder;
+  nsString mimeType;
+  if (!aTrackTypes) {
+    LOG(LogLevel::Error, ("NO TrackTypes!!!"));
+    return nullptr;
+  }
+#ifdef MOZ_WEBM_ENCODER
+  else if (MediaEncoder::IsWebMEncoderEnabled() &&
+          (aMIMEType.EqualsLiteral(VIDEO_WEBM) ||
+          (aTrackTypes & ContainerWriter::CREATE_VIDEO_TRACK))) {
+    if (aTrackTypes & ContainerWriter::CREATE_AUDIO_TRACK
+        && MediaDecoder::IsOpusEnabled()) {
+      audioEncoder = new OpusTrackEncoder();
+      NS_ENSURE_TRUE(audioEncoder, nullptr);
+    }
+    videoEncoder = new VP8TrackEncoder(aTrackRate);
+    writer = new WebMWriter(aTrackTypes);
+    NS_ENSURE_TRUE(writer, nullptr);
+    NS_ENSURE_TRUE(videoEncoder, nullptr);
+    mimeType = NS_LITERAL_STRING(VIDEO_WEBM);
+  }
+#endif //MOZ_WEBM_ENCODER
+  else if (MediaDecoder::IsOggEnabled() && MediaDecoder::IsOpusEnabled() &&
+           (aMIMEType.EqualsLiteral(AUDIO_OGG) ||
+           (aTrackTypes & ContainerWriter::CREATE_AUDIO_TRACK))) {
+    writer = new OggWriter();
+    audioEncoder = new OpusTrackEncoder();
+    NS_ENSURE_TRUE(writer, nullptr);
+    NS_ENSURE_TRUE(audioEncoder, nullptr);
+    mimeType = NS_LITERAL_STRING(AUDIO_OGG);
+  }
+  else {
+    LOG(LogLevel::Error, ("Can not find any encoder to record this media stream"));
+    return nullptr;
+  }
+  LOG(LogLevel::Debug, ("Create encoder result:a[%d] v[%d] w[%d] mimeType = %s.",
+                      audioEncoder != nullptr, videoEncoder != nullptr,
+                      writer != nullptr, mimeType.get()));
+  if (videoEncoder && aVideoBitrate != 0) {
+    videoEncoder->SetBitrate(aVideoBitrate);
+  }
+  if (audioEncoder && aAudioBitrate != 0) {
+    audioEncoder->SetBitrate(aAudioBitrate);
+  }
+  encoder = new MediaEncoder(writer.forget(), audioEncoder.forget(),
+                             videoEncoder.forget(), mimeType, aAudioBitrate,
+                             aVideoBitrate, aBitrate);
+  return encoder.forget();
+}
+
+/**
+ * GetEncodedData() runs as a state machine, starting with mState set to
+ * GET_METADDATA, the procedure should be as follow:
+ *
+ * While non-stop
+ *   If mState is GET_METADDATA
+ *     Get the meta data from audio/video encoder
+ *     If a meta data is generated
+ *       Get meta data from audio/video encoder
+ *       Set mState to ENCODE_TRACK
+ *       Return the final container data
+ *
+ *   If mState is ENCODE_TRACK
+ *     Get encoded track data from audio/video encoder
+ *     If a packet of track data is generated
+ *       Insert encoded track data into the container stream of writer
+ *       If the final container data is copied to aOutput
+ *         Return the copy of final container data
+ *       If this is the last packet of input stream
+ *         Set mState to ENCODE_DONE
+ *
+ *   If mState is ENCODE_DONE or ENCODE_ERROR
+ *     Stop the loop
+ */
+void
+MediaEncoder::GetEncodedData(nsTArray<nsTArray<uint8_t> >* aOutputBufs,
+                             nsAString& aMIMEType)
+{
+  MOZ_ASSERT(!NS_IsMainThread());
+
+  aMIMEType = mMIMEType;
+  PROFILER_LABEL("MediaEncoder", "GetEncodedData",
+    js::ProfileEntry::Category::OTHER);
+
+  bool reloop = true;
+  while (reloop) {
+    switch (mState) {
+    case ENCODE_METADDATA: {
+      LOG(LogLevel::Debug, ("ENCODE_METADDATA TimeStamp = %f", GetEncodeTimeStamp()));
+      nsresult rv = CopyMetadataToMuxer(mAudioEncoder.get());
+      if (NS_FAILED(rv)) {
+        LOG(LogLevel::Error, ("Error! Fail to Set Audio Metadata"));
+        break;
+      }
+      rv = CopyMetadataToMuxer(mVideoEncoder.get());
+      if (NS_FAILED(rv)) {
+        LOG(LogLevel::Error, ("Error! Fail to Set Video Metadata"));
+        break;
+      }
+
+      rv = mWriter->GetContainerData(aOutputBufs,
+                                     ContainerWriter::GET_HEADER);
+      if (aOutputBufs != nullptr) {
+        mSizeOfBuffer = aOutputBufs->ShallowSizeOfExcludingThis(MallocSizeOf);
+      }
+      if (NS_FAILED(rv)) {
+       LOG(LogLevel::Error,("Error! writer fail to generate header!"));
+       mState = ENCODE_ERROR;
+       break;
+      }
+      LOG(LogLevel::Debug, ("Finish ENCODE_METADDATA TimeStamp = %f", GetEncodeTimeStamp()));
+      mState = ENCODE_TRACK;
+      break;
+    }
+
+    case ENCODE_TRACK: {
+      LOG(LogLevel::Debug, ("ENCODE_TRACK TimeStamp = %f", GetEncodeTimeStamp()));
+      EncodedFrameContainer encodedData;
+      nsresult rv = NS_OK;
+      // We're most likely to actually wait for a video frame, so do that first to minimize
+      // capture offset/lipsync issues
+      rv = WriteEncodedDataToMuxer(mVideoEncoder.get());
+      if (NS_FAILED(rv)) {
+        LOG(LogLevel::Error, ("Fail to write video encoder data to muxer"));
+        break;
+      }
+      rv = WriteEncodedDataToMuxer(mAudioEncoder.get());
+      if (NS_FAILED(rv)) {
+        LOG(LogLevel::Error, ("Error! Fail to write audio encoder data to muxer"));
+        break;
+      }
+      LOG(LogLevel::Debug, ("Audio encoded TimeStamp = %f", GetEncodeTimeStamp()));
+      LOG(LogLevel::Debug, ("Video encoded TimeStamp = %f", GetEncodeTimeStamp()));
+      // In audio only or video only case, let unavailable track's flag to be true.
+      bool isAudioCompleted = (mAudioEncoder && mAudioEncoder->IsEncodingComplete()) || !mAudioEncoder;
+      bool isVideoCompleted = (mVideoEncoder && mVideoEncoder->IsEncodingComplete()) || !mVideoEncoder;
+      rv = mWriter->GetContainerData(aOutputBufs,
+                                     isAudioCompleted && isVideoCompleted ?
+                                     ContainerWriter::FLUSH_NEEDED : 0);
+      if (aOutputBufs != nullptr) {
+        mSizeOfBuffer = aOutputBufs->ShallowSizeOfExcludingThis(MallocSizeOf);
+      }
+      if (NS_SUCCEEDED(rv)) {
+        // Successfully get the copy of final container data from writer.
+        reloop = false;
+      }
+      mState = (mWriter->IsWritingComplete()) ? ENCODE_DONE : ENCODE_TRACK;
+      LOG(LogLevel::Debug, ("END ENCODE_TRACK TimeStamp = %f "
+          "mState = %d aComplete %d vComplete %d",
+          GetEncodeTimeStamp(), mState, isAudioCompleted, isVideoCompleted));
+      break;
+    }
+
+    case ENCODE_DONE:
+    case ENCODE_ERROR:
+      LOG(LogLevel::Debug, ("MediaEncoder has been shutdown."));
+      mSizeOfBuffer = 0;
+      mShutdown = true;
+      reloop = false;
+      break;
+    default:
+      MOZ_CRASH("Invalid encode state");
+    }
+  }
+}
+
+nsresult
+MediaEncoder::WriteEncodedDataToMuxer(TrackEncoder *aTrackEncoder)
+{
+  if (aTrackEncoder == nullptr) {
+    return NS_OK;
+  }
+  if (aTrackEncoder->IsEncodingComplete()) {
+    return NS_OK;
+  }
+
+  PROFILER_LABEL("MediaEncoder", "WriteEncodedDataToMuxer",
+    js::ProfileEntry::Category::OTHER);
+
+  EncodedFrameContainer encodedVideoData;
+  nsresult rv = aTrackEncoder->GetEncodedTrack(encodedVideoData);
+  if (NS_FAILED(rv)) {
+    // Encoding might be canceled.
+    LOG(LogLevel::Error, ("Error! Fail to get encoded data from video encoder."));
+    mState = ENCODE_ERROR;
+    return rv;
+  }
+  rv = mWriter->WriteEncodedTrack(encodedVideoData,
+                                  aTrackEncoder->IsEncodingComplete() ?
+                                  ContainerWriter::END_OF_STREAM : 0);
+  if (NS_FAILED(rv)) {
+    LOG(LogLevel::Error, ("Error! Fail to write encoded video track to the media container."));
+    mState = ENCODE_ERROR;
+  }
+  return rv;
+}
+
+nsresult
+MediaEncoder::CopyMetadataToMuxer(TrackEncoder *aTrackEncoder)
+{
+  if (aTrackEncoder == nullptr) {
+    return NS_OK;
+  }
+
+  PROFILER_LABEL("MediaEncoder", "CopyMetadataToMuxer",
+    js::ProfileEntry::Category::OTHER);
+
+  RefPtr<TrackMetadataBase> meta = aTrackEncoder->GetMetadata();
+  if (meta == nullptr) {
+    LOG(LogLevel::Error, ("Error! metadata = null"));
+    mState = ENCODE_ERROR;
+    return NS_ERROR_ABORT;
+  }
+
+  nsresult rv = mWriter->SetMetadata(meta);
+  if (NS_FAILED(rv)) {
+   LOG(LogLevel::Error, ("Error! SetMetadata fail"));
+   mState = ENCODE_ERROR;
+  }
+  return rv;
+}
+
+#ifdef MOZ_WEBM_ENCODER
+bool
+MediaEncoder::IsWebMEncoderEnabled()
+{
+  return Preferences::GetBool("media.encoder.webm.enabled");
+}
+#endif
+
+/*
+ * SizeOfExcludingThis measures memory being used by the Media Encoder.
+ * Currently it measures the size of the Encoder buffer and memory occupied
+ * by mAudioEncoder and mVideoEncoder.
+ */
+size_t
+MediaEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
+{
+  size_t amount = 0;
+  if (mState == ENCODE_TRACK) {
+    amount = mSizeOfBuffer +
+             (mAudioEncoder != nullptr ? mAudioEncoder->SizeOfExcludingThis(aMallocSizeOf) : 0) +
+             (mVideoEncoder != nullptr ? mVideoEncoder->SizeOfExcludingThis(aMallocSizeOf) : 0);
+  }
+  return amount;
+}
+
+} // namespace mozilla
diff --git a/dom/media/encoder/MediaEncoder.h b/dom/media/encoder/MediaEncoder.h
new file mode 100644
index 000000000..41d7e71e2
--- /dev/null
+++ b/dom/media/encoder/MediaEncoder.h
@@ -0,0 +1,258 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MediaEncoder_h_
+#define MediaEncoder_h_
+
+#include "mozilla/DebugOnly.h"
+#include "TrackEncoder.h"
+#include "ContainerWriter.h"
+#include "CubebUtils.h"
+#include "MediaStreamGraph.h"
+#include "MediaStreamListener.h"
+#include "nsAutoPtr.h"
+#include "MediaStreamVideoSink.h"
+#include "nsIMemoryReporter.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/Atomics.h"
+
+namespace mozilla {
+
+class MediaStreamVideoRecorderSink : public MediaStreamVideoSink
+{
+public:
+  explicit MediaStreamVideoRecorderSink(VideoTrackEncoder* aEncoder)
+    : mVideoEncoder(aEncoder) {}
+
+  // MediaStreamVideoSink methods
+  virtual void SetCurrentFrames(const VideoSegment& aSegment) override;
+  virtual void ClearFrames() override {}
+
+private:
+  virtual ~MediaStreamVideoRecorderSink() {}
+  VideoTrackEncoder* mVideoEncoder;
+};
+
+/**
+ * MediaEncoder is the framework of encoding module, it controls and manages
+ * procedures between ContainerWriter and TrackEncoder. ContainerWriter packs
+ * the encoded track data with a specific container (e.g. ogg, mp4).
+ * AudioTrackEncoder and VideoTrackEncoder are subclasses of TrackEncoder, and
+ * are responsible for encoding raw data coming from MediaStreamGraph.
+ *
+ * Also, MediaEncoder is a type of MediaStreamListener, it starts to receive raw
+ * segments after itself is added to the source stream. In the mean time,
+ * encoded track data is pulled by its owner periodically on a worker thread. A
+ * reentrant monitor is used to protect the push and pull of resource.
+ *
+ * MediaEncoder is designed to be a passive component, neither it owns nor in
+ * charge of managing threads. However, a monitor is used in function
+ * TrackEncoder::GetEncodedTrack() for the purpose of thread safety (e.g.
+ * between callbacks of MediaStreamListener and others), a call to this function
+ * might block. Therefore, MediaEncoder should not run on threads that forbid
+ * blocking, such as main thread or I/O thread.
+ *
+ * For example, an usage from MediaRecorder of this component would be:
+ * 1) Create an encoder with a valid MIME type.
+ *    => encoder = MediaEncoder::CreateEncoder(aMIMEType);
+ *    It then generate a ContainerWriter according to the MIME type, and an
+ *    AudioTrackEncoder (or a VideoTrackEncoder too) associated with the media
+ *    type.
+ *
+ * 2) Dispatch the task GetEncodedData() to a worker thread.
+ *
+ * 3) To start encoding, add this component to its source stream.
+ *    => sourceStream->AddListener(encoder);
+ *
+ * 4) To stop encoding, remove this component from its source stream.
+ *    => sourceStream->RemoveListener(encoder);
+ */
+class MediaEncoder : public DirectMediaStreamListener
+{
+  friend class MediaStreamVideoRecorderSink;
+public :
+  enum {
+    ENCODE_METADDATA,
+    ENCODE_TRACK,
+    ENCODE_DONE,
+    ENCODE_ERROR,
+  };
+
+  MediaEncoder(ContainerWriter* aWriter,
+               AudioTrackEncoder* aAudioEncoder,
+               VideoTrackEncoder* aVideoEncoder,
+               const nsAString& aMIMEType,
+               uint32_t aAudioBitrate,
+               uint32_t aVideoBitrate,
+               uint32_t aBitrate)
+    : mWriter(aWriter)
+    , mAudioEncoder(aAudioEncoder)
+    , mVideoEncoder(aVideoEncoder)
+    , mVideoSink(new MediaStreamVideoRecorderSink(mVideoEncoder))
+    , mStartTime(TimeStamp::Now())
+    , mMIMEType(aMIMEType)
+    , mSizeOfBuffer(0)
+    , mState(MediaEncoder::ENCODE_METADDATA)
+    , mShutdown(false)
+    , mDirectConnected(false)
+    , mSuspended(false)
+{}
+
+  ~MediaEncoder() {};
+
+  enum SuspendState {
+    RECORD_NOT_SUSPENDED,
+    RECORD_SUSPENDED,
+    RECORD_RESUMED
+  };
+
+  /* Note - called from control code, not on MSG threads. */
+  void Suspend()
+  {
+    mSuspended = RECORD_SUSPENDED;
+  }
+
+  /**
+   * Note - called from control code, not on MSG threads.
+   * Arm to collect the Duration of the next video frame and give it
+   * to the next frame, in order to avoid any possible loss of sync. */
+  void Resume()
+  {
+    if (mSuspended == RECORD_SUSPENDED) {
+      mSuspended = RECORD_RESUMED;
+    }
+  }
+
+  /**
+   * Tells us which Notify to pay attention to for media
+   */
+  void SetDirectConnect(bool aConnected);
+
+  /**
+   * Notified by the AppendToTrack in MediaStreamGraph; aRealtimeMedia is the raw
+   * track data in form of MediaSegment.
+   */
+  void NotifyRealtimeData(MediaStreamGraph* aGraph, TrackID aID,
+                          StreamTime aTrackOffset,
+                          uint32_t aTrackEvents,
+                          const MediaSegment& aRealtimeMedia) override;
+
+  /**
+   * Notified by the control loop of MediaStreamGraph; aQueueMedia is the raw
+   * track data in form of MediaSegment.
+   */
+  void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID,
+                                StreamTime aTrackOffset,
+                                TrackEventCommand aTrackEvents,
+                                const MediaSegment& aQueuedMedia,
+                                MediaStream* aInputStream,
+                                TrackID aInputTrackID) override;
+
+  /**
+   * Notifed by the control loop of MediaStreamGraph; aQueueMedia is the audio
+   * data in the form of an AudioSegment.
+   */
+  void NotifyQueuedAudioData(MediaStreamGraph* aGraph, TrackID aID,
+                             StreamTime aTrackOffset,
+                             const AudioSegment& aQueuedMedia,
+                             MediaStream* aInputStream,
+                             TrackID aInputTrackID) override;
+
+  /**
+   * * Notified the stream is being removed.
+   */
+  void NotifyEvent(MediaStreamGraph* aGraph,
+                   MediaStreamGraphEvent event) override;
+
+  /**
+   * Creates an encoder with a given MIME type. Returns null if we are unable
+   * to create the encoder. For now, default aMIMEType to "audio/ogg" and use
+   * Ogg+Opus if it is empty.
+   */
+  static already_AddRefed<MediaEncoder> CreateEncoder(const nsAString& aMIMEType,
+                                                      uint32_t aAudioBitrate, uint32_t aVideoBitrate,
+                                                      uint32_t aBitrate,
+                                                      uint8_t aTrackTypes = ContainerWriter::CREATE_AUDIO_TRACK,
+                                                      TrackRate aTrackRate = CubebUtils::PreferredSampleRate());
+  /**
+   * Encodes the raw track data and returns the final container data. Assuming
+   * it is called on a single worker thread. The buffer of container data is
+   * allocated in ContainerWriter::GetContainerData(), and is appended to
+   * aOutputBufs. aMIMEType is the valid mime-type of this returned container
+   * data.
+   */
+  void GetEncodedData(nsTArray<nsTArray<uint8_t> >* aOutputBufs,
+                      nsAString& aMIMEType);
+
+  /**
+   * Return true if MediaEncoder has been shutdown. Reasons are encoding
+   * complete, encounter an error, or being canceled by its caller.
+   */
+  bool IsShutdown()
+  {
+    return mShutdown;
+  }
+
+  /**
+   * Cancel the encoding, and wakes up the lock of reentrant monitor in encoder.
+   */
+  void Cancel()
+  {
+    if (mAudioEncoder) {
+      mAudioEncoder->NotifyCancel();
+    }
+    if (mVideoEncoder) {
+      mVideoEncoder->NotifyCancel();
+    }
+  }
+
+  bool HasError()
+  {
+    return mState == ENCODE_ERROR;
+  }
+
+#ifdef MOZ_WEBM_ENCODER
+  static bool IsWebMEncoderEnabled();
+#endif
+
+  MOZ_DEFINE_MALLOC_SIZE_OF(MallocSizeOf)
+  /*
+   * Measure the size of the buffer, and memory occupied by mAudioEncoder
+   * and mVideoEncoder
+   */
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+  MediaStreamVideoRecorderSink* GetVideoSink() {
+    return mVideoSink.get();
+  }
+
+private:
+  // Get encoded data from trackEncoder and write to muxer
+  nsresult WriteEncodedDataToMuxer(TrackEncoder *aTrackEncoder);
+  // Get metadata from trackEncoder and copy to muxer
+  nsresult CopyMetadataToMuxer(TrackEncoder* aTrackEncoder);
+  nsAutoPtr<ContainerWriter> mWriter;
+  nsAutoPtr<AudioTrackEncoder> mAudioEncoder;
+  nsAutoPtr<VideoTrackEncoder> mVideoEncoder;
+  RefPtr<MediaStreamVideoRecorderSink> mVideoSink;
+  TimeStamp mStartTime;
+  nsString mMIMEType;
+  int64_t mSizeOfBuffer;
+  int mState;
+  bool mShutdown;
+  bool mDirectConnected;
+  Atomic<int> mSuspended;
+  // Get duration from create encoder, for logging purpose
+  double GetEncodeTimeStamp()
+  {
+    TimeDuration decodeTime;
+    decodeTime = TimeStamp::Now() - mStartTime;
+    return decodeTime.ToMilliseconds();
+  }
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp
new file mode 100644
index 000000000..c65d57788
--- /dev/null
+++ b/dom/media/encoder/OpusTrackEncoder.cpp
@@ -0,0 +1,462 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "OpusTrackEncoder.h"
+#include "nsString.h"
+#include "GeckoProfiler.h"
+
+#include <opus/opus.h>
+
+#undef LOG
+#ifdef MOZ_WIDGET_GONK
+#include <android/log.h>
+#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
+#else
+#define LOG(args, ...)
+#endif
+
+namespace mozilla {
+
+// The Opus format supports up to 8 channels, and supports multitrack audio up
+// to 255 channels, but the current implementation supports only mono and
+// stereo, and downmixes any more than that.
+static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
+
+// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
+// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
+static const int MAX_CHANNELS = 2;
+
+// A maximum data bytes for Opus to encode.
+static const int MAX_DATA_BYTES = 4096;
+
+// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
+// Second paragraph, " The granule position of an audio data page is in units
+// of PCM audio samples at a fixed rate of 48 kHz."
+static const int kOpusSamplingRate = 48000;
+
+// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
+static const int kFrameDurationMs  = 20;
+
+// The supported sampling rate of input signal (Hz),
+// must be one of the following. Will resampled to 48kHz otherwise.
+static const int kOpusSupportedInputSamplingRates[] =
+                   {8000, 12000, 16000, 24000, 48000};
+
+namespace {
+
+// An endian-neutral serialization of integers. Serializing T in little endian
+// format to aOutput, where T is a 16 bits or 32 bits integer.
+template<typename T>
+static void
+SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
+{
+  for (uint32_t i = 0; i < sizeof(T); i++) {
+    aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
+  }
+}
+
+static inline void
+SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
+{
+  // Format of serializing a string to buffer is, the length of string (32 bits,
+  // little endian), and the string.
+  SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
+  aOutput->AppendElements(aComment.get(), aComment.Length());
+}
+
+
+static void
+SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
+                      uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
+{
+  // The magic signature, null terminator has to be stripped off from strings.
+  static const uint8_t magic[] = "OpusHead";
+  aOutput->AppendElements(magic, sizeof(magic) - 1);
+
+  // The version must always be 1 (8 bits, unsigned).
+  aOutput->AppendElement(1);
+
+  // Number of output channels (8 bits, unsigned).
+  aOutput->AppendElement(aChannelCount);
+
+  // Number of samples (at 48 kHz) to discard from the decoder output when
+  // starting playback (16 bits, unsigned, little endian).
+  SerializeToBuffer(aPreskip, aOutput);
+
+  // The sampling rate of input source (32 bits, unsigned, little endian).
+  SerializeToBuffer(aInputSampleRate, aOutput);
+
+  // Output gain, an encoder should set this field to zero (16 bits, signed,
+  // little endian).
+  SerializeToBuffer((int16_t)0, aOutput);
+
+  // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
+  // unsigned).
+  aOutput->AppendElement(0);
+}
+
+static void
+SerializeOpusCommentHeader(const nsCString& aVendor,
+                           const nsTArray<nsCString>& aComments,
+                           nsTArray<uint8_t>* aOutput)
+{
+  // The magic signature, null terminator has to be stripped off.
+  static const uint8_t magic[] = "OpusTags";
+  aOutput->AppendElements(magic, sizeof(magic) - 1);
+
+  // The vendor; Should append in the following order:
+  // vendor string length (32 bits, unsigned, little endian)
+  // vendor string.
+  SerializeToBuffer(aVendor, aOutput);
+
+  // Add comments; Should append in the following order:
+  // comment list length (32 bits, unsigned, little endian)
+  // comment #0 string length (32 bits, unsigned, little endian)
+  // comment #0 string
+  // comment #1 string length (32 bits, unsigned, little endian)
+  // comment #1 string ...
+  SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
+  for (uint32_t i = 0; i < aComments.Length(); ++i) {
+    SerializeToBuffer(aComments[i], aOutput);
+  }
+}
+
+}  // Anonymous namespace.
+
+OpusTrackEncoder::OpusTrackEncoder()
+  : AudioTrackEncoder()
+  , mEncoder(nullptr)
+  , mLookahead(0)
+  , mResampler(nullptr)
+  , mOutputTimeStamp(0)
+{
+}
+
+OpusTrackEncoder::~OpusTrackEncoder()
+{
+  if (mEncoder) {
+    opus_encoder_destroy(mEncoder);
+  }
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+    mResampler = nullptr;
+  }
+}
+
+nsresult
+OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
+{
+  // This monitor is used to wake up other methods that are waiting for encoder
+  // to be completely initialized.
+  ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+
+  NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
+                 NS_ERROR_FAILURE);
+
+  // This version of encoder API only support 1 or 2 channels,
+  // So set the mChannels less or equal 2 and
+  // let InterleaveTrackData downmix pcm data.
+  mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
+
+  // Reject non-audio sample rates.
+  NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG);
+  NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG);
+
+  // According to www.opus-codec.org, creating an opus encoder requires the
+  // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
+  // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
+  nsTArray<int> supportedSamplingRates;
+  supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
+                         ArrayLength(kOpusSupportedInputSamplingRates));
+  if (!supportedSamplingRates.Contains(aSamplingRate)) {
+    int error;
+    mResampler = speex_resampler_init(mChannels,
+                                      aSamplingRate,
+                                      kOpusSamplingRate,
+                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
+                                      &error);
+
+    if (error != RESAMPLER_ERR_SUCCESS) {
+      return NS_ERROR_FAILURE;
+    }
+  }
+  mSamplingRate = aSamplingRate;
+  NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
+
+  int error = 0;
+  mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
+                                 OPUS_APPLICATION_AUDIO, &error);
+
+
+  mInitialized = (error == OPUS_OK);
+
+  if (mAudioBitrate) {
+    opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate)));
+  }
+
+  mReentrantMonitor.NotifyAll();
+
+  return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
+}
+
+int
+OpusTrackEncoder::GetOutputSampleRate()
+{
+  return mResampler ? kOpusSamplingRate : mSamplingRate;
+}
+
+int
+OpusTrackEncoder::GetPacketDuration()
+{
+  return GetOutputSampleRate() * kFrameDurationMs / 1000;
+}
+
+already_AddRefed<TrackMetadataBase>
+OpusTrackEncoder::GetMetadata()
+{
+  PROFILER_LABEL("OpusTrackEncoder", "GetMetadata",
+    js::ProfileEntry::Category::OTHER);
+  {
+    // Wait if mEncoder is not initialized.
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    while (!mCanceled && !mInitialized) {
+      mReentrantMonitor.Wait();
+    }
+  }
+
+  if (mCanceled || mEncodingComplete) {
+    return nullptr;
+  }
+
+  RefPtr<OpusMetadata> meta = new OpusMetadata();
+  meta->mChannels = mChannels;
+  meta->mSamplingFrequency = mSamplingRate;
+
+  mLookahead = 0;
+  int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
+  if (error != OPUS_OK) {
+    mLookahead = 0;
+  }
+
+  // The ogg time stamping and pre-skip is always timed at 48000.
+  SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
+                        GetOutputSampleRate()), mSamplingRate,
+                        &meta->mIdHeader);
+
+  nsCString vendor;
+  vendor.AppendASCII(opus_get_version_string());
+
+  nsTArray<nsCString> comments;
+  comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
+
+  SerializeOpusCommentHeader(vendor, comments,
+                             &meta->mCommentHeader);
+
+  return meta.forget();
+}
+
+nsresult
+OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
+{
+  PROFILER_LABEL("OpusTrackEncoder", "GetEncodedTrack",
+    js::ProfileEntry::Category::OTHER);
+  {
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    // Wait until initialized or cancelled.
+    while (!mCanceled && !mInitialized) {
+      mReentrantMonitor.Wait();
+    }
+    if (mCanceled || mEncodingComplete) {
+      return NS_ERROR_FAILURE;
+    }
+  }
+
+  // calculation below depends on the truth that mInitialized is true.
+  MOZ_ASSERT(mInitialized);
+
+  bool wait = true;
+  int result = 0;
+  // Only wait once, then loop until we run out of packets of input data
+  while (result >= 0 && !mEncodingComplete) {
+    // re-sampled frames left last time which didn't fit into an Opus packet duration.
+    const int framesLeft = mResampledLeftover.Length() / mChannels;
+    // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
+    // of kOpusSamplingRate. There is not precision loss in the integer division
+    // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
+    // framesToFetch to ensure there will be at least n frames after re-sampling.
+    const int frameRoundUp = framesLeft ? 1 : 0;
+
+    MOZ_ASSERT(GetPacketDuration() >= framesLeft);
+    // Try to fetch m frames such that there will be n frames
+    // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
+    const int framesToFetch = !mResampler ? GetPacketDuration()
+                              : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
+                              + frameRoundUp;
+    {
+      // Move all the samples from mRawSegment to mSourceSegment. We only hold
+      // the monitor in this block.
+      ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+
+      // Wait until enough raw data, end of stream or cancelled.
+      while (!mCanceled && mRawSegment.GetDuration() +
+             mSourceSegment.GetDuration() < framesToFetch &&
+             !mEndOfStream) {
+        if (wait) {
+          mReentrantMonitor.Wait();
+          wait = false;
+        } else {
+          goto done; // nested while's...
+        }
+      }
+
+      if (mCanceled) {
+        return NS_ERROR_FAILURE;
+      }
+
+      mSourceSegment.AppendFrom(&mRawSegment);
+
+      // Pad |mLookahead| samples to the end of source stream to prevent lost of
+      // original data, the pcm duration will be calculated at rate 48K later.
+      if (mEndOfStream && !mEosSetInEncoder) {
+        mEosSetInEncoder = true;
+        mSourceSegment.AppendNullData(mLookahead);
+      }
+    }
+
+    // Start encoding data.
+    AutoTArray<AudioDataValue, 9600> pcm;
+    pcm.SetLength(GetPacketDuration() * mChannels);
+    AudioSegment::ChunkIterator iter(mSourceSegment);
+    int frameCopied = 0;
+
+    while (!iter.IsEnded() && frameCopied < framesToFetch) {
+      AudioChunk chunk = *iter;
+
+      // Chunk to the required frame size.
+      int frameToCopy = chunk.GetDuration();
+      if (frameCopied + frameToCopy > framesToFetch) {
+        frameToCopy = framesToFetch - frameCopied;
+      }
+
+      if (!chunk.IsNull()) {
+        // Append the interleaved data to the end of pcm buffer.
+        AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
+                                               pcm.Elements() + frameCopied * mChannels);
+      } else {
+        memset(pcm.Elements() + frameCopied * mChannels, 0,
+               frameToCopy * mChannels * sizeof(AudioDataValue));
+      }
+
+      frameCopied += frameToCopy;
+      iter.Next();
+    }
+
+    RefPtr<EncodedFrame> audiodata = new EncodedFrame();
+    audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
+    int framesInPCM = frameCopied;
+    if (mResampler) {
+      AutoTArray<AudioDataValue, 9600> resamplingDest;
+      // We want to consume all the input data, so we slightly oversize the
+      // resampled data buffer so we can fit the output data in. We cannot really
+      // predict the output frame count at each call.
+      uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
+      uint32_t inframes = frameCopied;
+
+      resamplingDest.SetLength(outframes * mChannels);
+
+#if MOZ_SAMPLE_TYPE_S16
+      short* in = reinterpret_cast<short*>(pcm.Elements());
+      short* out = reinterpret_cast<short*>(resamplingDest.Elements());
+      speex_resampler_process_interleaved_int(mResampler, in, &inframes,
+                                              out, &outframes);
+#else
+      float* in = reinterpret_cast<float*>(pcm.Elements());
+      float* out = reinterpret_cast<float*>(resamplingDest.Elements());
+      speex_resampler_process_interleaved_float(mResampler, in, &inframes,
+                                                out, &outframes);
+#endif
+
+      MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
+      PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
+              mResampledLeftover.Length());
+
+      uint32_t outframesToCopy = std::min(outframes,
+                                          static_cast<uint32_t>(GetPacketDuration() - framesLeft));
+
+      MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
+                 outframesToCopy * mChannels);
+      PodCopy(pcm.Elements() + mResampledLeftover.Length(),
+              resamplingDest.Elements(), outframesToCopy * mChannels);
+      int frameLeftover = outframes - outframesToCopy;
+      mResampledLeftover.SetLength(frameLeftover * mChannels);
+      PodCopy(mResampledLeftover.Elements(),
+              resamplingDest.Elements() + outframesToCopy * mChannels,
+              mResampledLeftover.Length());
+      // This is always at 48000Hz.
+      framesInPCM = framesLeft + outframesToCopy;
+      audiodata->SetDuration(framesInPCM);
+    } else {
+      // The ogg time stamping and pre-skip is always timed at 48000.
+      audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
+    }
+
+    // Remove the raw data which has been pulled to pcm buffer.
+    // The value of frameCopied should equal to (or smaller than, if eos)
+    // GetPacketDuration().
+    mSourceSegment.RemoveLeading(frameCopied);
+
+    // Has reached the end of input stream and all queued data has pulled for
+    // encoding.
+    if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) {
+      mEncodingComplete = true;
+      LOG("[Opus] Done encoding.");
+    }
+
+    MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration());
+
+    // Append null data to pcm buffer if the leftover data is not enough for
+    // opus encoder.
+    if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
+      PodZero(pcm.Elements() + framesInPCM * mChannels,
+              (GetPacketDuration() - framesInPCM) * mChannels);
+    }
+    nsTArray<uint8_t> frameData;
+    // Encode the data with Opus Encoder.
+    frameData.SetLength(MAX_DATA_BYTES);
+    // result is returned as opus error code if it is negative.
+    result = 0;
+#ifdef MOZ_SAMPLE_TYPE_S16
+    const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
+    result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
+                         frameData.Elements(), MAX_DATA_BYTES);
+#else
+    const float* pcmBuf = static_cast<float*>(pcm.Elements());
+    result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
+                               frameData.Elements(), MAX_DATA_BYTES);
+#endif
+    frameData.SetLength(result >= 0 ? result : 0);
+
+    if (result < 0) {
+      LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
+    }
+    if (mEncodingComplete) {
+      if (mResampler) {
+        speex_resampler_destroy(mResampler);
+        mResampler = nullptr;
+      }
+      mResampledLeftover.SetLength(0);
+    }
+
+    audiodata->SwapInFrameData(frameData);
+    // timestamp should be the time of the first sample
+    audiodata->SetTimeStamp(mOutputTimeStamp);
+    mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
+    LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
+    aData.AppendEncodedFrame(audiodata);
+  }
+done:
+  return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
+}
+
+} // namespace mozilla
diff --git a/dom/media/encoder/OpusTrackEncoder.h b/dom/media/encoder/OpusTrackEncoder.h
new file mode 100644
index 000000000..8fd21d49b
--- /dev/null
+++ b/dom/media/encoder/OpusTrackEncoder.h
@@ -0,0 +1,91 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef OpusTrackEncoder_h_
+#define OpusTrackEncoder_h_
+
+#include <stdint.h>
+#include <speex/speex_resampler.h>
+#include "TrackEncoder.h"
+
+struct OpusEncoder;
+
+namespace mozilla {
+
+// Opus meta data structure
+class OpusMetadata : public TrackMetadataBase
+{
+public:
+  // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus.
+  nsTArray<uint8_t> mIdHeader;
+  // The Comment Header of OggOpus.
+  nsTArray<uint8_t> mCommentHeader;
+  int32_t mChannels;
+  float mSamplingFrequency;
+  MetadataKind GetKind() const override { return METADATA_OPUS; }
+};
+
+class OpusTrackEncoder : public AudioTrackEncoder
+{
+public:
+  OpusTrackEncoder();
+  virtual ~OpusTrackEncoder();
+
+  already_AddRefed<TrackMetadataBase> GetMetadata() override;
+
+  nsresult GetEncodedTrack(EncodedFrameContainer& aData) override;
+
+protected:
+  int GetPacketDuration() override;
+
+  nsresult Init(int aChannels, int aSamplingRate) override;
+
+  /**
+   * Get the samplerate of the data to be fed to the Opus encoder. This might be
+   * different from the input samplerate if resampling occurs.
+   */
+  int GetOutputSampleRate();
+
+private:
+  /**
+   * The Opus encoder from libopus.
+   */
+  OpusEncoder* mEncoder;
+
+  /**
+   * A local segment queue which takes the raw data out from mRawSegment in the
+   * call of GetEncodedTrack(). Opus encoder only accepts GetPacketDuration()
+   * samples from mSourceSegment every encoding cycle, thus it needs to be
+   * global in order to store the leftover segments taken from mRawSegment.
+   */
+  AudioSegment mSourceSegment;
+
+  /**
+   * Total samples of delay added by codec, can be queried by the encoder. From
+   * the perspective of decoding, real data begins this many samples late, so
+   * the encoder needs to append this many null samples to the end of stream,
+   * in order to align the time of input and output.
+   */
+  int mLookahead;
+
+  /**
+   * If the input sample rate does not divide 48kHz evenly, the input data are
+   * resampled.
+   */
+  SpeexResamplerState* mResampler;
+
+  /**
+   * Store the resampled frames that don't fit into an Opus packet duration.
+   * They will be prepended to the resampled frames next encoding cycle.
+   */
+  nsTArray<AudioDataValue> mResampledLeftover;
+
+  // TimeStamp in microseconds.
+  uint64_t mOutputTimeStamp;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/TrackEncoder.cpp b/dom/media/encoder/TrackEncoder.cpp
new file mode 100644
index 000000000..ea39bb5a6
--- /dev/null
+++ b/dom/media/encoder/TrackEncoder.cpp
@@ -0,0 +1,342 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "TrackEncoder.h"
+#include "AudioChannelFormat.h"
+#include "MediaStreamGraph.h"
+#include "MediaStreamListener.h"
+#include "mozilla/Logging.h"
+#include "VideoUtils.h"
+
+#undef LOG
+#ifdef MOZ_WIDGET_GONK
+#include <android/log.h>
+#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
+#else
+#define LOG(args, ...)
+#endif
+
+namespace mozilla {
+
+LazyLogModule gTrackEncoderLog("TrackEncoder");
+#define TRACK_LOG(type, msg) MOZ_LOG(gTrackEncoderLog, type, msg)
+
+static const int DEFAULT_CHANNELS = 1;
+static const int DEFAULT_SAMPLING_RATE = 16000;
+static const int DEFAULT_FRAME_WIDTH = 640;
+static const int DEFAULT_FRAME_HEIGHT = 480;
+static const int DEFAULT_TRACK_RATE = USECS_PER_S;
+// 30 seconds threshold if the encoder still can't not be initialized.
+static const int INIT_FAILED_DURATION = 30;
+
+TrackEncoder::TrackEncoder()
+  : mReentrantMonitor("media.TrackEncoder")
+  , mEncodingComplete(false)
+  , mEosSetInEncoder(false)
+  , mInitialized(false)
+  , mEndOfStream(false)
+  , mCanceled(false)
+  , mInitCounter(0)
+  , mNotInitDuration(0)
+{
+}
+
+void TrackEncoder::NotifyEvent(MediaStreamGraph* aGraph,
+                 MediaStreamGraphEvent event)
+{
+  if (event == MediaStreamGraphEvent::EVENT_REMOVED) {
+    NotifyEndOfStream();
+  }
+}
+
+void
+AudioTrackEncoder::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
+                                            TrackID aID,
+                                            StreamTime aTrackOffset,
+                                            uint32_t aTrackEvents,
+                                            const MediaSegment& aQueuedMedia)
+{
+  if (mCanceled) {
+    return;
+  }
+
+  const AudioSegment& audio = static_cast<const AudioSegment&>(aQueuedMedia);
+
+  // Check and initialize parameters for codec encoder.
+  if (!mInitialized) {
+    mInitCounter++;
+    TRACK_LOG(LogLevel::Debug, ("Init the audio encoder %d times", mInitCounter));
+    AudioSegment::ChunkIterator iter(const_cast<AudioSegment&>(audio));
+    while (!iter.IsEnded()) {
+      AudioChunk chunk = *iter;
+
+      // The number of channels is determined by the first non-null chunk, and
+      // thus the audio encoder is initialized at this time.
+      if (!chunk.IsNull()) {
+        nsresult rv = Init(chunk.mChannelData.Length(), aGraph->GraphRate());
+        if (NS_FAILED(rv)) {
+          LOG("[AudioTrackEncoder]: Fail to initialize the encoder!");
+          NotifyCancel();
+        }
+        break;
+      }
+
+      iter.Next();
+    }
+
+    mNotInitDuration += aQueuedMedia.GetDuration();
+    if (!mInitialized &&
+        (mNotInitDuration / aGraph->GraphRate() > INIT_FAILED_DURATION) &&
+        mInitCounter > 1) {
+      LOG("[AudioTrackEncoder]: Initialize failed for 30s.");
+      NotifyEndOfStream();
+      return;
+    }
+  }
+
+  // Append and consume this raw segment.
+  AppendAudioSegment(audio);
+
+
+  // The stream has stopped and reached the end of track.
+  if (aTrackEvents == TrackEventCommand::TRACK_EVENT_ENDED) {
+    LOG("[AudioTrackEncoder]: Receive TRACK_EVENT_ENDED .");
+    NotifyEndOfStream();
+  }
+}
+
+void
+AudioTrackEncoder::NotifyEndOfStream()
+{
+  // If source audio track is completely silent till the end of encoding,
+  // initialize the encoder with default channel counts and sampling rate.
+  if (!mCanceled && !mInitialized) {
+    Init(DEFAULT_CHANNELS, DEFAULT_SAMPLING_RATE);
+  }
+
+  ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+  mEndOfStream = true;
+  mReentrantMonitor.NotifyAll();
+}
+
+nsresult
+AudioTrackEncoder::AppendAudioSegment(const AudioSegment& aSegment)
+{
+  ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+
+  AudioSegment::ChunkIterator iter(const_cast<AudioSegment&>(aSegment));
+  while (!iter.IsEnded()) {
+    AudioChunk chunk = *iter;
+    // Append and consume both non-null and null chunks.
+    mRawSegment.AppendAndConsumeChunk(&chunk);
+    iter.Next();
+  }
+
+  if (mRawSegment.GetDuration() >= GetPacketDuration()) {
+    mReentrantMonitor.NotifyAll();
+  }
+
+  return NS_OK;
+}
+
+/*static*/
+void
+AudioTrackEncoder::InterleaveTrackData(AudioChunk& aChunk,
+                                       int32_t aDuration,
+                                       uint32_t aOutputChannels,
+                                       AudioDataValue* aOutput)
+{
+  uint32_t numChannelsToCopy = std::min(aOutputChannels,
+                                        static_cast<uint32_t>(aChunk.mChannelData.Length()));
+  switch(aChunk.mBufferFormat) {
+    case AUDIO_FORMAT_S16: {
+      AutoTArray<const int16_t*, 2> array;
+      array.SetLength(numChannelsToCopy);
+      for (uint32_t i = 0; i < array.Length(); i++) {
+        array[i] = static_cast<const int16_t*>(aChunk.mChannelData[i]);
+      }
+      InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, aChunk.mVolume);
+      break;
+    }
+    case AUDIO_FORMAT_FLOAT32: {
+      AutoTArray<const float*, 2> array;
+      array.SetLength(numChannelsToCopy);
+      for (uint32_t i = 0; i < array.Length(); i++) {
+        array[i] = static_cast<const float*>(aChunk.mChannelData[i]);
+      }
+      InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, aChunk.mVolume);
+      break;
+   }
+   case AUDIO_FORMAT_SILENCE: {
+      MOZ_ASSERT(false, "To implement.");
+    }
+  };
+}
+
+/*static*/
+void
+AudioTrackEncoder::DeInterleaveTrackData(AudioDataValue* aInput,
+                                         int32_t aDuration,
+                                         int32_t aChannels,
+                                         AudioDataValue* aOutput)
+{
+  for (int32_t i = 0; i < aChannels; ++i) {
+    for(int32_t j = 0; j < aDuration; ++j) {
+      aOutput[i * aDuration + j] = aInput[i + j * aChannels];
+    }
+  }
+}
+
+size_t
+AudioTrackEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
+{
+  return mRawSegment.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+void
+VideoTrackEncoder::Init(const VideoSegment& aSegment)
+{
+  if (mInitialized) {
+    return;
+  }
+
+  mInitCounter++;
+  TRACK_LOG(LogLevel::Debug, ("Init the video encoder %d times", mInitCounter));
+  VideoSegment::ConstChunkIterator iter(aSegment);
+  while (!iter.IsEnded()) {
+   VideoChunk chunk = *iter;
+   if (!chunk.IsNull()) {
+     gfx::IntSize imgsize = chunk.mFrame.GetImage()->GetSize();
+     gfx::IntSize intrinsicSize = chunk.mFrame.GetIntrinsicSize();
+     nsresult rv = Init(imgsize.width, imgsize.height,
+                        intrinsicSize.width, intrinsicSize.height);
+
+     if (NS_FAILED(rv)) {
+       LOG("[VideoTrackEncoder]: Fail to initialize the encoder!");
+       NotifyCancel();
+     }
+     break;
+   }
+
+   iter.Next();
+  }
+
+  mNotInitDuration += aSegment.GetDuration();
+  if ((mNotInitDuration / mTrackRate > INIT_FAILED_DURATION) &&
+      mInitCounter > 1) {
+    LOG("[VideoTrackEncoder]: Initialize failed for %ds.", INIT_FAILED_DURATION);
+    NotifyEndOfStream();
+    return;
+  }
+
+}
+
+void
+VideoTrackEncoder::SetCurrentFrames(const VideoSegment& aSegment)
+{
+  if (mCanceled) {
+    return;
+  }
+
+  Init(aSegment);
+  AppendVideoSegment(aSegment);
+}
+
+void
+VideoTrackEncoder::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
+                                            TrackID aID,
+                                            StreamTime aTrackOffset,
+                                            uint32_t aTrackEvents,
+                                            const MediaSegment& aQueuedMedia)
+{
+  if (mCanceled) {
+    return;
+  }
+
+  if (!(aTrackEvents == TRACK_EVENT_CREATED ||
+       aTrackEvents == TRACK_EVENT_ENDED)) {
+    return;
+  }
+
+  const VideoSegment& video = static_cast<const VideoSegment&>(aQueuedMedia);
+
+   // Check and initialize parameters for codec encoder.
+  Init(video);
+
+  AppendVideoSegment(video);
+
+  // The stream has stopped and reached the end of track.
+  if (aTrackEvents == TrackEventCommand::TRACK_EVENT_ENDED) {
+    LOG("[VideoTrackEncoder]: Receive TRACK_EVENT_ENDED .");
+    NotifyEndOfStream();
+  }
+
+}
+
+nsresult
+VideoTrackEncoder::AppendVideoSegment(const VideoSegment& aSegment)
+{
+  ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+
+  // Append all video segments from MediaStreamGraph, including null an
+  // non-null frames.
+  VideoSegment::ChunkIterator iter(const_cast<VideoSegment&>(aSegment));
+  while (!iter.IsEnded()) {
+    VideoChunk chunk = *iter;
+    mLastFrameDuration += chunk.GetDuration();
+    // Send only the unique video frames for encoding.
+    // Or if we got the same video chunks more than 1 seconds,
+    // force to send into encoder.
+    if ((mLastFrame != chunk.mFrame) ||
+        (mLastFrameDuration >= mTrackRate)) {
+      RefPtr<layers::Image> image = chunk.mFrame.GetImage();
+
+      // Because we may get chunks with a null image (due to input blocking),
+      // accumulate duration and give it to the next frame that arrives.
+      // Canonically incorrect - the duration should go to the previous frame
+      // - but that would require delaying until the next frame arrives.
+      // Best would be to do like OMXEncoder and pass an effective timestamp
+      // in with each frame.
+      if (image) {
+        mRawSegment.AppendFrame(image.forget(),
+                                mLastFrameDuration,
+                                chunk.mFrame.GetIntrinsicSize(),
+                                PRINCIPAL_HANDLE_NONE,
+                                chunk.mFrame.GetForceBlack());
+        mLastFrameDuration = 0;
+      }
+    }
+    mLastFrame.TakeFrom(&chunk.mFrame);
+    iter.Next();
+  }
+
+  if (mRawSegment.GetDuration() > 0) {
+    mReentrantMonitor.NotifyAll();
+  }
+
+  return NS_OK;
+}
+
+void
+VideoTrackEncoder::NotifyEndOfStream()
+{
+  // If source video track is muted till the end of encoding, initialize the
+  // encoder with default frame width, frame height, and track rate.
+  if (!mCanceled && !mInitialized) {
+    Init(DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT,
+         DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT);
+  }
+
+  ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+  mEndOfStream = true;
+  mReentrantMonitor.NotifyAll();
+}
+
+size_t
+VideoTrackEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
+{
+  return mRawSegment.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+} // namespace mozilla
diff --git a/dom/media/encoder/TrackEncoder.h b/dom/media/encoder/TrackEncoder.h
new file mode 100644
index 000000000..33f20e899
--- /dev/null
+++ b/dom/media/encoder/TrackEncoder.h
@@ -0,0 +1,364 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TrackEncoder_h_
+#define TrackEncoder_h_
+
+#include "mozilla/ReentrantMonitor.h"
+
+#include "AudioSegment.h"
+#include "EncodedFrameContainer.h"
+#include "StreamTracks.h"
+#include "TrackMetadataBase.h"
+#include "VideoSegment.h"
+#include "MediaStreamGraph.h"
+
+namespace mozilla {
+
+/**
+ * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetimes managed by
+ * MediaEncoder. Most methods can only be called on the MediaEncoder's thread,
+ * but some subclass methods can be called on other threads when noted.
+ *
+ * NotifyQueuedTrackChanges is called on subclasses of this class from the
+ * MediaStreamGraph thread, and AppendAudioSegment/AppendVideoSegment is then
+ * called to store media data in the TrackEncoder. Later on, GetEncodedTrack is
+ * called on MediaEncoder's thread to encode and retrieve the encoded data.
+ */
+class TrackEncoder
+{
+public:
+  TrackEncoder();
+
+  virtual ~TrackEncoder() {}
+
+  /**
+   * Notified by the same callbcak of MediaEncoder when it has received a track
+   * change from MediaStreamGraph. Called on the MediaStreamGraph thread.
+   */
+  virtual void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID,
+                                        StreamTime aTrackOffset,
+                                        uint32_t aTrackEvents,
+                                        const MediaSegment& aQueuedMedia) = 0;
+
+  /**
+   * Notified by the same callback of MediaEncoder when it has been removed from
+   * MediaStreamGraph. Called on the MediaStreamGraph thread.
+   */
+  void NotifyEvent(MediaStreamGraph* aGraph,
+                   MediaStreamGraphEvent event);
+
+  /**
+   * Creates and sets up meta data for a specific codec, called on the worker
+   * thread.
+   */
+  virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0;
+
+  /**
+   * Encodes raw segments. Result data is returned in aData, and called on the
+   * worker thread.
+   */
+  virtual nsresult GetEncodedTrack(EncodedFrameContainer& aData) = 0;
+
+  /**
+   * True if the track encoder has encoded all source segments coming from
+   * MediaStreamGraph. Call on the worker thread.
+   */
+  bool IsEncodingComplete() { return mEncodingComplete; }
+
+  /**
+   * Notifies from MediaEncoder to cancel the encoding, and wakes up
+   * mReentrantMonitor if encoder is waiting on it.
+   */
+  void NotifyCancel()
+  {
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    mCanceled = true;
+    mReentrantMonitor.NotifyAll();
+  }
+
+  virtual void SetBitrate(const uint32_t aBitrate) {}
+
+protected:
+  /**
+   * Notifies track encoder that we have reached the end of source stream, and
+   * wakes up mReentrantMonitor if encoder is waiting for any source data.
+   */
+  virtual void NotifyEndOfStream() = 0;
+
+  /**
+   * A ReentrantMonitor to protect the pushing and pulling of mRawSegment which
+   * is declared in its subclasses, and the following flags: mInitialized,
+   * EndOfStream and mCanceled. The control of protection is managed by its
+   * subclasses.
+   */
+  ReentrantMonitor mReentrantMonitor;
+
+  /**
+   * True if the track encoder has encoded all source data.
+   */
+  bool mEncodingComplete;
+
+  /**
+   * True if flag of EOS or any form of indicating EOS has set in the codec-
+   * encoder.
+   */
+  bool mEosSetInEncoder;
+
+  /**
+   * True if the track encoder has initialized successfully, protected by
+   * mReentrantMonitor.
+   */
+  bool mInitialized;
+
+  /**
+   * True if the TrackEncoder has received an event of TRACK_EVENT_ENDED from
+   * MediaStreamGraph, or the MediaEncoder is removed from its source stream,
+   * protected by mReentrantMonitor.
+   */
+  bool mEndOfStream;
+
+  /**
+   * True if a cancellation of encoding is sent from MediaEncoder, protected by
+   * mReentrantMonitor.
+   */
+  bool mCanceled;
+
+  // How many times we have tried to initialize the encoder.
+  uint32_t mInitCounter;
+  StreamTime mNotInitDuration;
+};
+
+class AudioTrackEncoder : public TrackEncoder
+{
+public:
+  AudioTrackEncoder()
+    : TrackEncoder()
+    , mChannels(0)
+    , mSamplingRate(0)
+    , mAudioBitrate(0)
+  {}
+
+  void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID,
+                                StreamTime aTrackOffset,
+                                uint32_t aTrackEvents,
+                                const MediaSegment& aQueuedMedia) override;
+
+  template<typename T>
+  static
+  void InterleaveTrackData(nsTArray<const T*>& aInput,
+                           int32_t aDuration,
+                           uint32_t aOutputChannels,
+                           AudioDataValue* aOutput,
+                           float aVolume)
+  {
+    if (aInput.Length() < aOutputChannels) {
+      // Up-mix. This might make the mChannelData have more than aChannels.
+      AudioChannelsUpMix(&aInput, aOutputChannels, SilentChannel::ZeroChannel<T>());
+    }
+
+    if (aInput.Length() > aOutputChannels) {
+      DownmixAndInterleave(aInput, aDuration,
+                           aVolume, aOutputChannels, aOutput);
+    } else {
+      InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume,
+                                 aOutputChannels, aOutput);
+    }
+  }
+
+  /**
+   * Interleaves the track data and stores the result into aOutput. Might need
+   * to up-mix or down-mix the channel data if the channels number of this chunk
+   * is different from aOutputChannels. The channel data from aChunk might be
+   * modified by up-mixing.
+   */
+  static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration,
+                                  uint32_t aOutputChannels,
+                                  AudioDataValue* aOutput);
+
+  /**
+   * De-interleaves the aInput data and stores the result into aOutput.
+   * No up-mix or down-mix operations inside.
+   */
+  static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration,
+                                    int32_t aChannels, AudioDataValue* aOutput);
+  /**
+  * Measure size of mRawSegment
+  */
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+  void SetBitrate(const uint32_t aBitrate) override
+  {
+    mAudioBitrate = aBitrate;
+  }
+protected:
+  /**
+   * Number of samples per channel in a pcm buffer. This is also the value of
+   * frame size required by audio encoder, and mReentrantMonitor will be
+   * notified when at least this much data has been added to mRawSegment.
+   */
+  virtual int GetPacketDuration() { return 0; }
+
+  /**
+   * Initializes the audio encoder. The call of this method is delayed until we
+   * have received the first valid track from MediaStreamGraph, and the
+   * mReentrantMonitor will be notified if other methods is waiting for encoder
+   * to be completely initialized. This method is called on the MediaStreamGraph
+   * thread.
+   */
+  virtual nsresult Init(int aChannels, int aSamplingRate) = 0;
+
+  /**
+   * Appends and consumes track data from aSegment, this method is called on
+   * the MediaStreamGraph thread. mReentrantMonitor will be notified when at
+   * least GetPacketDuration() data has been added to mRawSegment, wake up other
+   * method which is waiting for more data from mRawSegment.
+   */
+  nsresult AppendAudioSegment(const AudioSegment& aSegment);
+
+  /**
+   * Notifies the audio encoder that we have reached the end of source stream,
+   * and wakes up mReentrantMonitor if encoder is waiting for more track data.
+   */
+  void NotifyEndOfStream() override;
+
+  /**
+   * The number of channels are used for processing PCM data in the audio encoder.
+   * This value comes from the first valid audio chunk. If encoder can't support
+   * the channels in the chunk, downmix PCM stream can be performed.
+   * This value also be used to initialize the audio encoder.
+   */
+  int mChannels;
+
+  /**
+   * The sampling rate of source audio data.
+   */
+  int mSamplingRate;
+
+  /**
+   * A segment queue of audio track data, protected by mReentrantMonitor.
+   */
+  AudioSegment mRawSegment;
+
+  uint32_t mAudioBitrate;
+};
+
+class VideoTrackEncoder : public TrackEncoder
+{
+public:
+  explicit VideoTrackEncoder(TrackRate aTrackRate)
+    : TrackEncoder()
+    , mFrameWidth(0)
+    , mFrameHeight(0)
+    , mDisplayWidth(0)
+    , mDisplayHeight(0)
+    , mTrackRate(aTrackRate)
+    , mTotalFrameDuration(0)
+    , mLastFrameDuration(0)
+    , mVideoBitrate(0)
+  {}
+
+  /**
+   * Notified by the same callback of MediaEncoder when it has received a track
+   * change from MediaStreamGraph. Called on the MediaStreamGraph thread.
+   */
+  void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID,
+                                StreamTime aTrackOffset,
+                                uint32_t aTrackEvents,
+                                const MediaSegment& aQueuedMedia) override;
+  /**
+  * Measure size of mRawSegment
+  */
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+  void SetBitrate(const uint32_t aBitrate) override
+  {
+    mVideoBitrate = aBitrate;
+  }
+
+  void Init(const VideoSegment& aSegment);
+
+  void SetCurrentFrames(const VideoSegment& aSegment);
+
+  StreamTime SecondsToMediaTime(double aS) const
+  {
+    NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX/TRACK_RATE_MAX,
+                 "Bad seconds");
+    return mTrackRate * aS;
+  }
+
+protected:
+  /**
+   * Initialized the video encoder. In order to collect the value of width and
+   * height of source frames, this initialization is delayed until we have
+   * received the first valid video frame from MediaStreamGraph;
+   * mReentrantMonitor will be notified after it has successfully initialized,
+   * and this method is called on the MediaStramGraph thread.
+   */
+  virtual nsresult Init(int aWidth, int aHeight, int aDisplayWidth,
+                        int aDisplayHeight) = 0;
+
+  /**
+   * Appends source video frames to mRawSegment. We only append the source chunk
+   * if it is unique to mLastChunk. Called on the MediaStreamGraph thread.
+   */
+  nsresult AppendVideoSegment(const VideoSegment& aSegment);
+
+  /**
+   * Tells the video track encoder that we've reached the end of source stream,
+   * and wakes up mReentrantMonitor if encoder is waiting for more track data.
+   * Called on the MediaStreamGraph thread.
+   */
+  void NotifyEndOfStream() override;
+
+  /**
+   * The width of source video frame, ceiled if the source width is odd.
+   */
+  int mFrameWidth;
+
+  /**
+   * The height of source video frame, ceiled if the source height is odd.
+   */
+  int mFrameHeight;
+
+  /**
+   * The display width of source video frame.
+   */
+  int mDisplayWidth;
+
+  /**
+   * The display height of source video frame.
+   */
+  int mDisplayHeight;
+
+  /**
+   * The track rate of source video.
+   */
+  TrackRate mTrackRate;
+
+  /**
+   * The total duration of frames in encoded video in StreamTime, kept track of
+   * in subclasses.
+   */
+  StreamTime mTotalFrameDuration;
+
+  /**
+   * The last unique frame and duration we've sent to track encoder,
+   * kept track of in subclasses.
+   */
+  VideoFrame mLastFrame;
+  StreamTime mLastFrameDuration;
+
+  /**
+   * A segment queue of audio track data, protected by mReentrantMonitor.
+   */
+  VideoSegment mRawSegment;
+
+  uint32_t mVideoBitrate;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/TrackMetadataBase.h b/dom/media/encoder/TrackMetadataBase.h
new file mode 100644
index 000000000..a8b818c09
--- /dev/null
+++ b/dom/media/encoder/TrackMetadataBase.h
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TrackMetadataBase_h_
+#define TrackMetadataBase_h_
+
+#include "nsTArray.h"
+#include "nsCOMPtr.h"
+namespace mozilla {
+
+// A class represent meta data for various codec format. Only support one track information.
+class TrackMetadataBase
+{
+public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackMetadataBase)
+  enum MetadataKind {
+    METADATA_OPUS,    // Represent the Opus metadata
+    METADATA_VP8,
+    METADATA_VORBIS,
+    METADATA_AVC,
+    METADATA_AAC,
+    METADATA_AMR,
+    METADATA_EVRC,
+    METADATA_UNKNOWN  // Metadata Kind not set
+  };
+  // Return the specific metadata kind
+  virtual MetadataKind GetKind() const = 0;
+
+protected:
+  // Protected destructor, to discourage deletion outside of Release():
+  virtual ~TrackMetadataBase() {}
+};
+
+// The base class for audio metadata.
+class AudioTrackMetadata : public TrackMetadataBase {
+public:
+  // The duration of each sample set generated by encoder. (counted by samples)
+  // If the duration is variant, this value should return 0.
+  virtual uint32_t GetAudioFrameDuration() = 0;
+
+  // The size of each sample set generated by encoder. (counted by byte)
+  // If the size is variant, this value should return 0.
+  virtual uint32_t GetAudioFrameSize() = 0;
+
+  // AudioSampleRate is the number of audio sample per second.
+  virtual uint32_t GetAudioSampleRate() = 0;
+
+  virtual uint32_t GetAudioChannels() = 0;
+};
+
+// The base class for video metadata.
+class VideoTrackMetadata : public TrackMetadataBase {
+public:
+  // VideoHeight and VideoWidth are the frame size of the elementary stream.
+  virtual uint32_t GetVideoHeight() = 0;
+  virtual uint32_t GetVideoWidth() = 0;
+
+  // VideoDisplayHeight and VideoDisplayWidth are the display frame size.
+  virtual uint32_t GetVideoDisplayHeight() = 0;
+  virtual uint32_t GetVideoDisplayWidth() = 0;
+
+  // VideoClockRate is the number of samples per second in video frame's
+  // timestamp.
+  // For example, if VideoClockRate is 90k Hz and VideoFrameRate is
+  // 30 fps, each frame's sample duration will be 3000 Hz.
+  virtual uint32_t GetVideoClockRate() = 0;
+
+  // VideoFrameRate is numner of frames per second.
+  virtual uint32_t GetVideoFrameRate() = 0;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/VP8TrackEncoder.cpp b/dom/media/encoder/VP8TrackEncoder.cpp
new file mode 100644
index 000000000..1e5451f0f
--- /dev/null
+++ b/dom/media/encoder/VP8TrackEncoder.cpp
@@ -0,0 +1,678 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "VP8TrackEncoder.h"
+#include "GeckoProfiler.h"
+#include "LayersLogging.h"
+#include "libyuv.h"
+#include "mozilla/gfx/2D.h"
+#include "prsystem.h"
+#include "VideoSegment.h"
+#include "VideoUtils.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+#include "WebMWriter.h"
+#include "mozilla/media/MediaUtils.h"
+
+namespace mozilla {
+
+LazyLogModule gVP8TrackEncoderLog("VP8TrackEncoder");
+#define VP8LOG(msg, ...) MOZ_LOG(gVP8TrackEncoderLog, mozilla::LogLevel::Debug, \
+                                  (msg, ##__VA_ARGS__))
+// Debug logging macro with object pointer and class name.
+
+#define DEFAULT_BITRATE_BPS 2500000
+#define DEFAULT_ENCODE_FRAMERATE 30
+
+using namespace mozilla::gfx;
+using namespace mozilla::layers;
+
+VP8TrackEncoder::VP8TrackEncoder(TrackRate aTrackRate)
+  : VideoTrackEncoder(aTrackRate)
+  , mEncodedFrameDuration(0)
+  , mEncodedTimestamp(0)
+  , mRemainingTicks(0)
+  , mVPXContext(new vpx_codec_ctx_t())
+  , mVPXImageWrapper(new vpx_image_t())
+{
+  MOZ_COUNT_CTOR(VP8TrackEncoder);
+}
+
+VP8TrackEncoder::~VP8TrackEncoder()
+{
+  if (mInitialized) {
+    vpx_codec_destroy(mVPXContext);
+  }
+
+  if (mVPXImageWrapper) {
+    vpx_img_free(mVPXImageWrapper);
+  }
+  MOZ_COUNT_DTOR(VP8TrackEncoder);
+}
+
+nsresult
+VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth,
+                      int32_t aDisplayHeight)
+{
+  if (aWidth < 1 || aHeight < 1 || aDisplayWidth < 1 || aDisplayHeight < 1) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+
+  mEncodedFrameRate = DEFAULT_ENCODE_FRAMERATE;
+  mEncodedFrameDuration = mTrackRate / mEncodedFrameRate;
+  mFrameWidth = aWidth;
+  mFrameHeight = aHeight;
+  mDisplayWidth = aDisplayWidth;
+  mDisplayHeight = aDisplayHeight;
+
+  // Encoder configuration structure.
+  vpx_codec_enc_cfg_t config;
+  memset(&config, 0, sizeof(vpx_codec_enc_cfg_t));
+  if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config, 0)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  // Creating a wrapper to the image - setting image data to NULL. Actual
+  // pointer will be set in encode. Setting align to 1, as it is meaningless
+  // (actual memory is not allocated).
+  vpx_img_wrap(mVPXImageWrapper, VPX_IMG_FMT_I420,
+               mFrameWidth, mFrameHeight, 1, nullptr);
+
+  config.g_w = mFrameWidth;
+  config.g_h = mFrameHeight;
+  // TODO: Maybe we should have various aFrameRate bitrate pair for each devices?
+  // or for different platform
+
+  // rc_target_bitrate needs kbit/s
+  config.rc_target_bitrate = (mVideoBitrate != 0 ? mVideoBitrate : DEFAULT_BITRATE_BPS)/1000;
+
+  // Setting the time base of the codec
+  config.g_timebase.num = 1;
+  config.g_timebase.den = mTrackRate;
+
+  config.g_error_resilient = 0;
+
+  config.g_lag_in_frames = 0; // 0- no frame lagging
+
+  int32_t number_of_cores = PR_GetNumberOfProcessors();
+  if (mFrameWidth * mFrameHeight > 1280 * 960 && number_of_cores >= 6) {
+    config.g_threads = 3; // 3 threads for 1080p.
+  } else if (mFrameWidth * mFrameHeight > 640 * 480 && number_of_cores >= 3) {
+    config.g_threads = 2; // 2 threads for qHD/HD.
+  } else {
+    config.g_threads = 1; // 1 thread for VGA or less
+  }
+
+  // rate control settings
+  config.rc_dropframe_thresh = 0;
+  config.rc_end_usage = VPX_CBR;
+  config.g_pass = VPX_RC_ONE_PASS;
+  // ffmpeg doesn't currently support streams that use resize.
+  // Therefore, for safety, we should turn it off until it does.
+  config.rc_resize_allowed = 0;
+  config.rc_undershoot_pct = 100;
+  config.rc_overshoot_pct = 15;
+  config.rc_buf_initial_sz = 500;
+  config.rc_buf_optimal_sz = 600;
+  config.rc_buf_sz = 1000;
+
+  config.kf_mode = VPX_KF_AUTO;
+  // Ensure that we can output one I-frame per second.
+  config.kf_max_dist = mEncodedFrameRate;
+
+  vpx_codec_flags_t flags = 0;
+  flags |= VPX_CODEC_USE_OUTPUT_PARTITION;
+  if (vpx_codec_enc_init(mVPXContext, vpx_codec_vp8_cx(), &config, flags)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  vpx_codec_control(mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1);
+  vpx_codec_control(mVPXContext, VP8E_SET_CPUUSED, -6);
+  vpx_codec_control(mVPXContext, VP8E_SET_TOKEN_PARTITIONS,
+                    VP8_ONE_TOKENPARTITION);
+
+  mInitialized = true;
+  mon.NotifyAll();
+
+  return NS_OK;
+}
+
+already_AddRefed<TrackMetadataBase>
+VP8TrackEncoder::GetMetadata()
+{
+  PROFILER_LABEL("VP8TrackEncoder", "GetMetadata",
+    js::ProfileEntry::Category::OTHER);
+  {
+    // Wait if mEncoder is not initialized.
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    while (!mCanceled && !mInitialized) {
+      mon.Wait();
+    }
+  }
+
+  if (mCanceled || mEncodingComplete) {
+    return nullptr;
+  }
+
+  RefPtr<VP8Metadata> meta = new VP8Metadata();
+  meta->mWidth = mFrameWidth;
+  meta->mHeight = mFrameHeight;
+  meta->mDisplayWidth = mDisplayWidth;
+  meta->mDisplayHeight = mDisplayHeight;
+  meta->mEncodedFrameRate = mEncodedFrameRate;
+
+  return meta.forget();
+}
+
+bool
+VP8TrackEncoder::GetEncodedPartitions(EncodedFrameContainer& aData)
+{
+  vpx_codec_iter_t iter = nullptr;
+  EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME;
+  nsTArray<uint8_t> frameData;
+  const vpx_codec_cx_pkt_t *pkt = nullptr;
+  while ((pkt = vpx_codec_get_cx_data(mVPXContext, &iter)) != nullptr) {
+    switch (pkt->kind) {
+      case VPX_CODEC_CX_FRAME_PKT: {
+        // Copy the encoded data from libvpx to frameData
+        frameData.AppendElements((uint8_t*)pkt->data.frame.buf,
+                                 pkt->data.frame.sz);
+        break;
+      }
+      default: {
+        break;
+      }
+    }
+    // End of frame
+    if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
+      if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+        frameType = EncodedFrame::VP8_I_FRAME;
+      }
+      break;
+    }
+  }
+
+  if (!frameData.IsEmpty()) {
+    // Copy the encoded data to aData.
+    EncodedFrame* videoData = new EncodedFrame();
+    videoData->SetFrameType(frameType);
+    // Convert the timestamp and duration to Usecs.
+    CheckedInt64 timestamp = FramesToUsecs(pkt->data.frame.pts, mTrackRate);
+    if (timestamp.isValid()) {
+      videoData->SetTimeStamp((uint64_t)timestamp.value());
+    }
+    CheckedInt64 duration = FramesToUsecs(pkt->data.frame.duration, mTrackRate);
+    if (duration.isValid()) {
+      videoData->SetDuration((uint64_t)duration.value());
+    }
+    videoData->SwapInFrameData(frameData);
+    VP8LOG("GetEncodedPartitions TimeStamp %lld Duration %lld\n",
+           videoData->GetTimeStamp(), videoData->GetDuration());
+    VP8LOG("frameType %d\n", videoData->GetFrameType());
+    aData.AppendEncodedFrame(videoData);
+  }
+
+  return !!pkt;
+}
+
+static bool isYUV420(const PlanarYCbCrImage::Data *aData)
+{
+  if (aData->mYSize == aData->mCbCrSize * 2) {
+    return true;
+  }
+  return false;
+}
+
+static bool isYUV422(const PlanarYCbCrImage::Data *aData)
+{
+  if ((aData->mYSize.width == aData->mCbCrSize.width * 2) &&
+      (aData->mYSize.height == aData->mCbCrSize.height)) {
+    return true;
+  }
+  return false;
+}
+
+static bool isYUV444(const PlanarYCbCrImage::Data *aData)
+{
+  if (aData->mYSize == aData->mCbCrSize) {
+    return true;
+  }
+  return false;
+}
+
+nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk &aChunk)
+{
+  RefPtr<Image> img;
+  if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) {
+    if (!mMuteFrame) {
+      mMuteFrame = VideoFrame::CreateBlackImage(gfx::IntSize(mFrameWidth, mFrameHeight));
+      MOZ_ASSERT(mMuteFrame);
+    }
+    img = mMuteFrame;
+  } else {
+    img = aChunk.mFrame.GetImage();
+  }
+
+  if (img->GetSize() != IntSize(mFrameWidth, mFrameHeight)) {
+    VP8LOG("Dynamic resolution changes (was %dx%d, now %dx%d) are unsupported\n",
+           mFrameWidth, mFrameHeight, img->GetSize().width, img->GetSize().height);
+    return NS_ERROR_FAILURE;
+  }
+
+  ImageFormat format = img->GetFormat();
+  if (format == ImageFormat::PLANAR_YCBCR) {
+    PlanarYCbCrImage* yuv = static_cast<PlanarYCbCrImage *>(img.get());
+
+    MOZ_RELEASE_ASSERT(yuv);
+    if (!yuv->IsValid()) {
+      NS_WARNING("PlanarYCbCrImage is not valid");
+      return NS_ERROR_FAILURE;
+    }
+    const PlanarYCbCrImage::Data *data = yuv->GetData();
+
+    if (isYUV420(data) && !data->mCbSkip) {
+      // 420 planar, no need for conversions
+      mVPXImageWrapper->planes[VPX_PLANE_Y] = data->mYChannel;
+      mVPXImageWrapper->planes[VPX_PLANE_U] = data->mCbChannel;
+      mVPXImageWrapper->planes[VPX_PLANE_V] = data->mCrChannel;
+      mVPXImageWrapper->stride[VPX_PLANE_Y] = data->mYStride;
+      mVPXImageWrapper->stride[VPX_PLANE_U] = data->mCbCrStride;
+      mVPXImageWrapper->stride[VPX_PLANE_V] = data->mCbCrStride;
+
+      return NS_OK;
+    }
+  }
+
+  // Not 420 planar, have to convert
+  uint32_t yPlaneSize = mFrameWidth * mFrameHeight;
+  uint32_t halfWidth = (mFrameWidth + 1) / 2;
+  uint32_t halfHeight = (mFrameHeight + 1) / 2;
+  uint32_t uvPlaneSize = halfWidth * halfHeight;
+
+  if (mI420Frame.IsEmpty()) {
+    mI420Frame.SetLength(yPlaneSize + uvPlaneSize * 2);
+  }
+
+  uint8_t *y = mI420Frame.Elements();
+  uint8_t *cb = mI420Frame.Elements() + yPlaneSize;
+  uint8_t *cr = mI420Frame.Elements() + yPlaneSize + uvPlaneSize;
+
+  if (format == ImageFormat::PLANAR_YCBCR) {
+    PlanarYCbCrImage* yuv = static_cast<PlanarYCbCrImage *>(img.get());
+
+    MOZ_RELEASE_ASSERT(yuv);
+    if (!yuv->IsValid()) {
+      NS_WARNING("PlanarYCbCrImage is not valid");
+      return NS_ERROR_FAILURE;
+    }
+    const PlanarYCbCrImage::Data *data = yuv->GetData();
+
+    int rv;
+    std::string yuvFormat;
+    if (isYUV420(data) && data->mCbSkip) {
+      // If mCbSkip is set, we assume it's nv12 or nv21.
+      if (data->mCbChannel < data->mCrChannel) { // nv12
+        rv = libyuv::NV12ToI420(data->mYChannel, data->mYStride,
+                                data->mCbChannel, data->mCbCrStride,
+                                y, mFrameWidth,
+                                cb, halfWidth,
+                                cr, halfWidth,
+                                mFrameWidth, mFrameHeight);
+        yuvFormat = "NV12";
+      } else { // nv21
+        rv = libyuv::NV21ToI420(data->mYChannel, data->mYStride,
+                                data->mCrChannel, data->mCbCrStride,
+                                y, mFrameWidth,
+                                cb, halfWidth,
+                                cr, halfWidth,
+                                mFrameWidth, mFrameHeight);
+        yuvFormat = "NV21";
+      }
+    } else if (isYUV444(data) && !data->mCbSkip) {
+      rv = libyuv::I444ToI420(data->mYChannel, data->mYStride,
+                              data->mCbChannel, data->mCbCrStride,
+                              data->mCrChannel, data->mCbCrStride,
+                              y, mFrameWidth,
+                              cb, halfWidth,
+                              cr, halfWidth,
+                              mFrameWidth, mFrameHeight);
+      yuvFormat = "I444";
+    } else if (isYUV422(data) && !data->mCbSkip) {
+      rv = libyuv::I422ToI420(data->mYChannel, data->mYStride,
+                              data->mCbChannel, data->mCbCrStride,
+                              data->mCrChannel, data->mCbCrStride,
+                              y, mFrameWidth,
+                              cb, halfWidth,
+                              cr, halfWidth,
+                              mFrameWidth, mFrameHeight);
+      yuvFormat = "I422";
+    } else {
+      VP8LOG("Unsupported planar format\n");
+      NS_ASSERTION(false, "Unsupported planar format");
+      return NS_ERROR_NOT_IMPLEMENTED;
+    }
+
+    if (rv != 0) {
+      VP8LOG("Converting an %s frame to I420 failed\n", yuvFormat.c_str());
+      return NS_ERROR_FAILURE;
+    }
+
+    VP8LOG("Converted an %s frame to I420\n", yuvFormat.c_str());
+  } else {
+    // Not YCbCr at all. Try to get access to the raw data and convert.
+
+    RefPtr<SourceSurface> surf = GetSourceSurface(img.forget());
+    if (!surf) {
+      VP8LOG("Getting surface from %s image failed\n", Stringify(format).c_str());
+      return NS_ERROR_FAILURE;
+    }
+
+    RefPtr<DataSourceSurface> data = surf->GetDataSurface();
+    if (!data) {
+      VP8LOG("Getting data surface from %s image with %s (%s) surface failed\n",
+             Stringify(format).c_str(), Stringify(surf->GetType()).c_str(),
+             Stringify(surf->GetFormat()).c_str());
+      return NS_ERROR_FAILURE;
+    }
+
+    DataSourceSurface::ScopedMap map(data, DataSourceSurface::READ);
+    if (!map.IsMapped()) {
+      VP8LOG("Reading DataSourceSurface from %s image with %s (%s) surface failed\n",
+             Stringify(format).c_str(), Stringify(surf->GetType()).c_str(),
+             Stringify(surf->GetFormat()).c_str());
+      return NS_ERROR_FAILURE;
+    }
+
+    int rv;
+    switch (surf->GetFormat()) {
+      case SurfaceFormat::B8G8R8A8:
+      case SurfaceFormat::B8G8R8X8:
+        rv = libyuv::ARGBToI420(static_cast<uint8*>(map.GetData()),
+                                map.GetStride(),
+                                y, mFrameWidth,
+                                cb, halfWidth,
+                                cr, halfWidth,
+                                mFrameWidth, mFrameHeight);
+        break;
+      case SurfaceFormat::R5G6B5_UINT16:
+        rv = libyuv::RGB565ToI420(static_cast<uint8*>(map.GetData()),
+                                  map.GetStride(),
+                                  y, mFrameWidth,
+                                  cb, halfWidth,
+                                  cr, halfWidth,
+                                  mFrameWidth, mFrameHeight);
+        break;
+      default:
+        VP8LOG("Unsupported SourceSurface format %s\n",
+               Stringify(surf->GetFormat()).c_str());
+        NS_ASSERTION(false, "Unsupported SourceSurface format");
+        return NS_ERROR_NOT_IMPLEMENTED;
+    }
+
+    if (rv != 0) {
+      VP8LOG("%s to I420 conversion failed\n",
+             Stringify(surf->GetFormat()).c_str());
+      return NS_ERROR_FAILURE;
+    }
+
+    VP8LOG("Converted a %s frame to I420\n",
+           Stringify(surf->GetFormat()).c_str());
+  }
+
+  mVPXImageWrapper->planes[VPX_PLANE_Y] = y;
+  mVPXImageWrapper->planes[VPX_PLANE_U] = cb;
+  mVPXImageWrapper->planes[VPX_PLANE_V] = cr;
+  mVPXImageWrapper->stride[VPX_PLANE_Y] = mFrameWidth;
+  mVPXImageWrapper->stride[VPX_PLANE_U] = halfWidth;
+  mVPXImageWrapper->stride[VPX_PLANE_V] = halfWidth;
+
+  return NS_OK;
+}
+
+void
+VP8TrackEncoder::ReplyGetSourceSurface(already_AddRefed<gfx::SourceSurface> aSurf)
+{
+  mSourceSurface = aSurf;
+}
+
+already_AddRefed<gfx::SourceSurface>
+VP8TrackEncoder::GetSourceSurface(already_AddRefed<Image> aImg)
+{
+  RefPtr<Image> img = aImg;
+  mSourceSurface = nullptr;
+  if (img) {
+    if (img->AsGLImage() && !NS_IsMainThread()) {
+      // GLImage::GetAsSourceSurface() only support main thread
+      RefPtr<Runnable> getsourcesurface_runnable =
+        media::NewRunnableFrom([this, img]() -> nsresult {
+          // Due to the parameter DISPATCH_SYNC, encoder thread will stock at
+          // MediaRecorder::Session::Extract(bool). There is no chance
+          // that TrackEncoder will be destroyed during this period. So
+          // there is no need to use RefPtr to hold TrackEncoder.
+          ReplyGetSourceSurface(img->GetAsSourceSurface());
+          return NS_OK;
+        });
+      NS_DispatchToMainThread(getsourcesurface_runnable, NS_DISPATCH_SYNC);
+    } else {
+      mSourceSurface = img->GetAsSourceSurface();
+    }
+  }
+  return mSourceSurface.forget();
+}
+
+// These two define value used in GetNextEncodeOperation to determine the
+// EncodeOperation for next target frame.
+#define I_FRAME_RATIO (0.5)
+#define SKIP_FRAME_RATIO (0.75)
+
+/**
+ * Compares the elapsed time from the beginning of GetEncodedTrack and
+ * the processed frame duration in mSourceSegment
+ * in order to set the nextEncodeOperation for next target frame.
+ */
+VP8TrackEncoder::EncodeOperation
+VP8TrackEncoder::GetNextEncodeOperation(TimeDuration aTimeElapsed,
+                                        StreamTime aProcessedDuration)
+{
+  int64_t durationInUsec =
+    FramesToUsecs(aProcessedDuration + mEncodedFrameDuration,
+                  mTrackRate).value();
+  if (aTimeElapsed.ToMicroseconds() > (durationInUsec * SKIP_FRAME_RATIO)) {
+    // The encoder is too slow.
+    // We should skip next frame to consume the mSourceSegment.
+    return SKIP_FRAME;
+  } else if (aTimeElapsed.ToMicroseconds() > (durationInUsec * I_FRAME_RATIO)) {
+    // The encoder is a little slow.
+    // We force the encoder to encode an I-frame to accelerate.
+    return ENCODE_I_FRAME;
+  } else {
+    return ENCODE_NORMAL_FRAME;
+  }
+}
+
+StreamTime
+VP8TrackEncoder::CalculateRemainingTicks(StreamTime aDurationCopied,
+                                         StreamTime aEncodedDuration)
+{
+  return mRemainingTicks + aEncodedDuration - aDurationCopied;
+}
+
+// Try to extend the encodedDuration as long as possible if the target frame
+// has a long duration.
+StreamTime
+VP8TrackEncoder::CalculateEncodedDuration(StreamTime aDurationCopied)
+{
+  StreamTime temp64 = aDurationCopied;
+  StreamTime encodedDuration = mEncodedFrameDuration;
+  temp64 -= mRemainingTicks;
+  while (temp64 > mEncodedFrameDuration) {
+    temp64 -= mEncodedFrameDuration;
+    encodedDuration += mEncodedFrameDuration;
+  }
+  return encodedDuration;
+}
+
+/**
+ * Encoding flow in GetEncodedTrack():
+ * 1: Check the mInitialized state and the packet duration.
+ * 2: Move the data from mRawSegment to mSourceSegment.
+ * 3: Encode the video chunks in mSourceSegment in a for-loop.
+ * 3.1: Pick the video chunk by mRemainingTicks.
+ * 3.2: Calculate the encoding duration for the parameter of vpx_codec_encode().
+ *      The encoding duration is a multiple of mEncodedFrameDuration.
+ * 3.3: Setup the video chunk to mVPXImageWrapper by PrepareRawFrame().
+ * 3.4: Send frame into vp8 encoder by vpx_codec_encode().
+ * 3.5: Get the output frame from encoder by calling GetEncodedPartitions().
+ * 3.6: Calculate the mRemainingTicks for next target frame.
+ * 3.7: Set the nextEncodeOperation for the next target frame.
+ *      There is a heuristic: If the frame duration we have processed in
+ *      mSourceSegment is 100ms, means that we can't spend more than 100ms to
+ *      encode it.
+ * 4. Remove the encoded chunks in mSourceSegment after for-loop.
+ *
+ * Ex1: Input frame rate is 100 => input frame duration is 10ms for each.
+ *     mEncodedFrameRate is 30 => output frame duration is 33ms.
+ *     In this case, the frame duration in mSourceSegment will be:
+ *     1st : 0~10ms
+ *     2nd : 10~20ms
+ *     3rd : 20~30ms
+ *     4th : 30~40ms
+ *     ...
+ *     The VP8 encoder will take the 1st and 4th frames to encode. At beginning
+ *     mRemainingTicks is 0 for 1st frame, then the mRemainingTicks is set
+ *     to 23 to pick the 4th frame. (mEncodedFrameDuration - 1st frame duration)
+ *
+ * Ex2: Input frame rate is 25 => frame duration is 40ms for each.
+ *     mEncodedFrameRate is 30 => output frame duration is 33ms.
+ *     In this case, the frame duration in mSourceSegment will be:
+ *     1st : 0~40ms
+ *     2nd : 40~80ms
+ *     3rd : 80~120ms
+ *     4th : 120~160ms
+ *     ...
+ *     Because the input frame duration is 40ms larger than 33ms, so the first
+ *     encoded frame duration will be 66ms by calling CalculateEncodedDuration.
+ *     And the mRemainingTicks will be set to 26
+ *     (CalculateRemainingTicks 0+66-40) in order to pick the next frame(2nd)
+ *     in mSourceSegment.
+ */
+nsresult
+VP8TrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
+{
+  PROFILER_LABEL("VP8TrackEncoder", "GetEncodedTrack",
+    js::ProfileEntry::Category::OTHER);
+  bool EOS;
+  {
+    // Move all the samples from mRawSegment to mSourceSegment. We only hold
+    // the monitor in this block.
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    // Wait if mEncoder is not initialized, or when not enough raw data, but is
+    // not the end of stream nor is being canceled.
+    while (!mCanceled && (!mInitialized ||
+           (mRawSegment.GetDuration() + mSourceSegment.GetDuration() <
+            mEncodedFrameDuration && !mEndOfStream))) {
+      mon.Wait();
+    }
+    if (mCanceled || mEncodingComplete) {
+      return NS_ERROR_FAILURE;
+    }
+    mSourceSegment.AppendFrom(&mRawSegment);
+    EOS = mEndOfStream;
+  }
+
+  VideoSegment::ChunkIterator iter(mSourceSegment);
+  StreamTime durationCopied = 0;
+  StreamTime totalProcessedDuration = 0;
+  TimeStamp timebase = TimeStamp::Now();
+  EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME;
+
+  for (; !iter.IsEnded(); iter.Next()) {
+    VideoChunk &chunk = *iter;
+    // Accumulate chunk's duration to durationCopied until it reaches
+    // mRemainingTicks.
+    durationCopied += chunk.GetDuration();
+    MOZ_ASSERT(mRemainingTicks <= mEncodedFrameDuration);
+    VP8LOG("durationCopied %lld mRemainingTicks %lld\n",
+           durationCopied, mRemainingTicks);
+    if (durationCopied >= mRemainingTicks) {
+      VP8LOG("nextEncodeOperation is %d\n",nextEncodeOperation);
+      // Calculate encodedDuration for this target frame.
+      StreamTime encodedDuration = CalculateEncodedDuration(durationCopied);
+
+      // Encode frame.
+      if (nextEncodeOperation != SKIP_FRAME) {
+        nsresult rv = PrepareRawFrame(chunk);
+        NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+
+        // Encode the data with VP8 encoder
+        int flags = (nextEncodeOperation == ENCODE_NORMAL_FRAME) ?
+                    0 : VPX_EFLAG_FORCE_KF;
+        if (vpx_codec_encode(mVPXContext, mVPXImageWrapper, mEncodedTimestamp,
+                             (unsigned long)encodedDuration, flags,
+                             VPX_DL_REALTIME)) {
+          return NS_ERROR_FAILURE;
+        }
+        // Get the encoded data from VP8 encoder.
+        GetEncodedPartitions(aData);
+      } else {
+        // SKIP_FRAME
+        // Extend the duration of the last encoded data in aData
+        // because this frame will be skip.
+        RefPtr<EncodedFrame> last = aData.GetEncodedFrames().LastElement();
+        if (last) {
+          CheckedInt64 skippedDuration = FramesToUsecs(chunk.mDuration, mTrackRate);
+          if (skippedDuration.isValid() && skippedDuration.value() > 0) {
+            last->SetDuration(last->GetDuration() +
+                              (static_cast<uint64_t>(skippedDuration.value())));
+          }
+        }
+      }
+      // Move forward the mEncodedTimestamp.
+      mEncodedTimestamp += encodedDuration;
+      totalProcessedDuration += durationCopied;
+      // Calculate mRemainingTicks for next target frame.
+      mRemainingTicks = CalculateRemainingTicks(durationCopied,
+                                                encodedDuration);
+
+      // Check the remain data is enough for next target frame.
+      if (mSourceSegment.GetDuration() - totalProcessedDuration
+          >= mEncodedFrameDuration) {
+        TimeDuration elapsedTime = TimeStamp::Now() - timebase;
+        nextEncodeOperation = GetNextEncodeOperation(elapsedTime,
+                                                     totalProcessedDuration);
+        // Reset durationCopied for next iteration.
+        durationCopied = 0;
+      } else {
+        // Process done, there is no enough data left for next iteration,
+        // break the for-loop.
+        break;
+      }
+    }
+  }
+  // Remove the chunks we have processed.
+  mSourceSegment.RemoveLeading(totalProcessedDuration);
+  VP8LOG("RemoveLeading %lld\n",totalProcessedDuration);
+
+  // End of stream, pull the rest frames in encoder.
+  if (EOS) {
+    VP8LOG("mEndOfStream is true\n");
+    mEncodingComplete = true;
+    // Bug 1243611, keep calling vpx_codec_encode and vpx_codec_get_cx_data
+    // until vpx_codec_get_cx_data return null.
+
+    do {
+      if (vpx_codec_encode(mVPXContext, nullptr, mEncodedTimestamp,
+                           mEncodedFrameDuration, 0, VPX_DL_REALTIME)) {
+        return NS_ERROR_FAILURE;
+      }
+    } while(GetEncodedPartitions(aData));
+  }
+
+  return NS_OK ;
+}
+
+} // namespace mozilla
diff --git a/dom/media/encoder/VP8TrackEncoder.h b/dom/media/encoder/VP8TrackEncoder.h
new file mode 100644
index 000000000..5a046ee5d
--- /dev/null
+++ b/dom/media/encoder/VP8TrackEncoder.h
@@ -0,0 +1,99 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VP8TrackEncoder_h_
+#define VP8TrackEncoder_h_
+
+#include "TrackEncoder.h"
+#include "vpx/vpx_codec.h"
+
+namespace mozilla {
+
+typedef struct vpx_codec_ctx vpx_codec_ctx_t;
+typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t;
+typedef struct vpx_image vpx_image_t;
+
+/**
+ * VP8TrackEncoder implements VideoTrackEncoder by using libvpx library.
+ * We implement a realtime and fixed FPS encoder. In order to achieve that,
+ * there is a pick target frame and drop frame encoding policy implemented in
+ * GetEncodedTrack.
+ */
+class VP8TrackEncoder : public VideoTrackEncoder
+{
+  enum EncodeOperation {
+    ENCODE_NORMAL_FRAME, // VP8 track encoder works normally.
+    ENCODE_I_FRAME, // The next frame will be encoded as I-Frame.
+    SKIP_FRAME, // Skip the next frame.
+  };
+public:
+  explicit VP8TrackEncoder(TrackRate aTrackRate);
+  virtual ~VP8TrackEncoder();
+
+  already_AddRefed<TrackMetadataBase> GetMetadata() final override;
+
+  nsresult GetEncodedTrack(EncodedFrameContainer& aData) final override;
+
+  void ReplyGetSourceSurface(already_AddRefed<gfx::SourceSurface> aSurf);
+protected:
+  nsresult Init(int32_t aWidth, int32_t aHeight,
+                int32_t aDisplayWidth, int32_t aDisplayHeight) final override;
+
+private:
+  // Calculate the target frame's encoded duration.
+  StreamTime CalculateEncodedDuration(StreamTime aDurationCopied);
+
+  // Calculate the mRemainingTicks for next target frame.
+  StreamTime CalculateRemainingTicks(StreamTime aDurationCopied,
+                                     StreamTime aEncodedDuration);
+
+  // Get the EncodeOperation for next target frame.
+  EncodeOperation GetNextEncodeOperation(TimeDuration aTimeElapsed,
+                                         StreamTime aProcessedDuration);
+
+  // Get the encoded data from encoder to aData.
+  // Return value: false if the vpx_codec_get_cx_data returns null
+  //               for EOS detection.
+  bool GetEncodedPartitions(EncodedFrameContainer& aData);
+
+  // Prepare the input data to the mVPXImageWrapper for encoding.
+  nsresult PrepareRawFrame(VideoChunk &aChunk);
+
+  already_AddRefed<gfx::SourceSurface> GetSourceSurface(already_AddRefed<layers::Image> aImg);
+
+  // Output frame rate.
+  uint32_t mEncodedFrameRate;
+  // Duration for the output frame, reciprocal to mEncodedFrameRate.
+  StreamTime mEncodedFrameDuration;
+  // Encoded timestamp.
+  StreamTime mEncodedTimestamp;
+  // Duration to the next encode frame.
+  StreamTime mRemainingTicks;
+
+  // Muted frame, we only create it once.
+  RefPtr<layers::Image> mMuteFrame;
+
+  // I420 frame, for converting to I420.
+  nsTArray<uint8_t> mI420Frame;
+
+  /**
+   * A local segment queue which takes the raw data out from mRawSegment in the
+   * call of GetEncodedTrack(). Since we implement the fixed FPS encoding
+   * policy, it needs to be global in order to store the leftover segments
+   * taken from mRawSegment.
+   */
+  VideoSegment mSourceSegment;
+
+  // VP8 relative members.
+  // Codec context structure.
+  nsAutoPtr<vpx_codec_ctx_t> mVPXContext;
+  // Image Descriptor.
+  nsAutoPtr<vpx_image_t> mVPXImageWrapper;
+  RefPtr<gfx::SourceSurface> mSourceSurface;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/fmp4_muxer/AMRBox.cpp b/dom/media/encoder/fmp4_muxer/AMRBox.cpp
new file mode 100644
index 000000000..cd1a34fae
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/AMRBox.cpp
@@ -0,0 +1,84 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ISOControl.h"
+#include "ISOMediaBoxes.h"
+#include "AMRBox.h"
+#include "ISOTrackMetadata.h"
+
+namespace mozilla {
+
+nsresult
+AMRSampleEntry::Generate(uint32_t* aBoxSize)
+{
+  uint32_t box_size;
+  nsresult rv = amr_special_box->Generate(&box_size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  size += box_size;
+
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+AMRSampleEntry::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  nsresult rv;
+  rv = AudioSampleEntry::Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = amr_special_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+AMRSampleEntry::AMRSampleEntry(ISOControl* aControl)
+  : AudioSampleEntry(NS_LITERAL_CSTRING("samr"), aControl)
+{
+  amr_special_box = new AMRSpecificBox(aControl);
+  MOZ_COUNT_CTOR(AMRSampleEntry);
+}
+
+AMRSampleEntry::~AMRSampleEntry()
+{
+  MOZ_COUNT_DTOR(AMRSampleEntry);
+}
+
+nsresult
+AMRSpecificBox::Generate(uint32_t* aBoxSize)
+{
+  nsresult rv;
+  FragmentBuffer* frag = mControl->GetFragment(Audio_Track);
+  rv = frag->GetCSD(amrDecSpecInfo);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  size += amrDecSpecInfo.Length();
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+AMRSpecificBox::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  Box::Write();
+  mControl->Write(amrDecSpecInfo.Elements(), amrDecSpecInfo.Length());
+  return NS_OK;
+}
+
+AMRSpecificBox::AMRSpecificBox(ISOControl* aControl)
+  : Box(NS_LITERAL_CSTRING("damr"), aControl)
+{
+  MOZ_COUNT_CTOR(AMRSpecificBox);
+}
+
+AMRSpecificBox::~AMRSpecificBox()
+{
+  MOZ_COUNT_DTOR(AMRSpecificBox);
+}
+
+}
diff --git a/dom/media/encoder/fmp4_muxer/AMRBox.h b/dom/media/encoder/fmp4_muxer/AMRBox.h
new file mode 100644
index 000000000..645d7f89c
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/AMRBox.h
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AMRBOX_h_
+#define AMRBOX_h_
+
+#include "nsTArray.h"
+#include "MuxerOperation.h"
+
+namespace mozilla {
+
+class ISOControl;
+
+// 3GPP TS 26.244 6.7 'AMRSpecificBox field for AMRSampleEntry box'
+// Box type: 'damr'
+class AMRSpecificBox : public Box {
+public:
+  // 3GPP members
+  nsTArray<uint8_t> amrDecSpecInfo;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // AMRSpecificBox methods
+  AMRSpecificBox(ISOControl* aControl);
+  ~AMRSpecificBox();
+};
+
+// 3GPP TS 26.244 6.5 'AMRSampleEntry box'
+// Box type: 'sawb'
+class AMRSampleEntry : public AudioSampleEntry {
+public:
+  // 3GPP members
+  RefPtr<AMRSpecificBox> amr_special_box;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // AMRSampleEntry methods
+  AMRSampleEntry(ISOControl* aControl);
+  ~AMRSampleEntry();
+};
+
+}
+
+#endif // AMRBOX_h_
diff --git a/dom/media/encoder/fmp4_muxer/AVCBox.cpp b/dom/media/encoder/fmp4_muxer/AVCBox.cpp
new file mode 100644
index 000000000..a45cda8b7
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/AVCBox.cpp
@@ -0,0 +1,87 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <climits>
+#include "ISOControl.h"
+#include "ISOMediaBoxes.h"
+#include "AVCBox.h"
+
+namespace mozilla {
+
+nsresult
+AVCSampleEntry::Generate(uint32_t* aBoxSize)
+{
+  uint32_t avc_box_size = 0;
+  nsresult rv;
+  rv = avcConfigBox->Generate(&avc_box_size);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  size += avc_box_size;
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+AVCSampleEntry::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  nsresult rv;
+  rv = VisualSampleEntry::Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = avcConfigBox->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+AVCSampleEntry::AVCSampleEntry(ISOControl* aControl)
+  : VisualSampleEntry(NS_LITERAL_CSTRING("avc1"), aControl)
+{
+  avcConfigBox = new AVCConfigurationBox(aControl);
+  MOZ_COUNT_CTOR(AVCSampleEntry);
+}
+
+AVCSampleEntry::~AVCSampleEntry()
+{
+  MOZ_COUNT_DTOR(AVCSampleEntry);
+}
+
+AVCConfigurationBox::AVCConfigurationBox(ISOControl* aControl)
+  : Box(NS_LITERAL_CSTRING("avcC"), aControl)
+{
+  MOZ_COUNT_CTOR(AVCConfigurationBox);
+}
+
+AVCConfigurationBox::~AVCConfigurationBox()
+{
+  MOZ_COUNT_DTOR(AVCConfigurationBox);
+}
+
+nsresult
+AVCConfigurationBox::Generate(uint32_t* aBoxSize)
+{
+  nsresult rv;
+  FragmentBuffer* frag = mControl->GetFragment(Video_Track);
+  rv = frag->GetCSD(avcConfig);
+  NS_ENSURE_SUCCESS(rv, rv);
+  size += avcConfig.Length();
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+AVCConfigurationBox::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  Box::Write();
+
+  mControl->Write(avcConfig.Elements(), avcConfig.Length());
+
+  return NS_OK;
+}
+
+}
diff --git a/dom/media/encoder/fmp4_muxer/AVCBox.h b/dom/media/encoder/fmp4_muxer/AVCBox.h
new file mode 100644
index 000000000..9640d9e8f
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/AVCBox.h
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AVCBox_h_
+#define AVCBox_h_
+
+#include "nsTArray.h"
+#include "ISOMediaBoxes.h"
+
+namespace mozilla {
+
+class ISOControl;
+
+// 14496-12 8.5.2.2
+#define resolution_72_dpi 0x00480000
+#define video_depth 0x0018
+
+// 14496-15 5.3.4.1 'Sample description name and format'
+// Box type: 'avcC'
+class AVCConfigurationBox : public Box {
+public:
+  // ISO BMFF members
+
+  // avcConfig is CodecSpecificData from 14496-15 '5.3.4.1 Sample description
+  // name and format.
+  // These data are generated by encoder and we encapsulated the generated
+  // bitstream into box directly.
+  nsTArray<uint8_t> avcConfig;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // AVCConfigurationBox methods
+  AVCConfigurationBox(ISOControl* aControl);
+  ~AVCConfigurationBox();
+};
+
+// 14496-15 5.3.4.1 'Sample description name and format'
+// Box type: 'avc1'
+class AVCSampleEntry : public VisualSampleEntry {
+public:
+  // ISO BMFF members
+  RefPtr<AVCConfigurationBox> avcConfigBox;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // VisualSampleEntry methods
+  AVCSampleEntry(ISOControl* aControl);
+  ~AVCSampleEntry();
+};
+
+}
+
+#endif // AVCBox_h_
diff --git a/dom/media/encoder/fmp4_muxer/EVRCBox.cpp b/dom/media/encoder/fmp4_muxer/EVRCBox.cpp
new file mode 100644
index 000000000..096e4013d
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/EVRCBox.cpp
@@ -0,0 +1,84 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ISOControl.h"
+#include "ISOMediaBoxes.h"
+#include "EVRCBox.h"
+#include "ISOTrackMetadata.h"
+
+namespace mozilla {
+
+nsresult
+EVRCSampleEntry::Generate(uint32_t* aBoxSize)
+{
+  uint32_t box_size;
+  nsresult rv = evrc_special_box->Generate(&box_size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  size += box_size;
+
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+EVRCSampleEntry::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  nsresult rv;
+  rv = AudioSampleEntry::Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = evrc_special_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+EVRCSampleEntry::EVRCSampleEntry(ISOControl* aControl)
+  : AudioSampleEntry(NS_LITERAL_CSTRING("sevc"), aControl)
+{
+  evrc_special_box = new EVRCSpecificBox(aControl);
+  MOZ_COUNT_CTOR(EVRCSampleEntry);
+}
+
+EVRCSampleEntry::~EVRCSampleEntry()
+{
+  MOZ_COUNT_DTOR(EVRCSampleEntry);
+}
+
+nsresult
+EVRCSpecificBox::Generate(uint32_t* aBoxSize)
+{
+  nsresult rv;
+  FragmentBuffer* frag = mControl->GetFragment(Audio_Track);
+  rv = frag->GetCSD(evrcDecSpecInfo);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  size += evrcDecSpecInfo.Length();
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+EVRCSpecificBox::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  Box::Write();
+  mControl->Write(evrcDecSpecInfo.Elements(), evrcDecSpecInfo.Length());
+  return NS_OK;
+}
+
+EVRCSpecificBox::EVRCSpecificBox(ISOControl* aControl)
+  : Box(NS_LITERAL_CSTRING("devc"), aControl)
+{
+  MOZ_COUNT_CTOR(EVRCSpecificBox);
+}
+
+EVRCSpecificBox::~EVRCSpecificBox()
+{
+  MOZ_COUNT_DTOR(EVRCSpecificBox);
+}
+
+}
diff --git a/dom/media/encoder/fmp4_muxer/EVRCBox.h b/dom/media/encoder/fmp4_muxer/EVRCBox.h
new file mode 100644
index 000000000..31355849a
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/EVRCBox.h
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef EVRCBOX_h_
+#define EVRCBOX_h_
+
+#include "nsTArray.h"
+#include "MuxerOperation.h"
+
+namespace mozilla {
+
+class ISOControl;
+
+// 3GPP TS 26.244 6.7 'EVRCSpecificBox field for EVRCSampleEntry box'
+// Box type: 'devc'
+class EVRCSpecificBox : public Box {
+public:
+  // 3GPP members
+  nsTArray<uint8_t> evrcDecSpecInfo;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // EVRCSpecificBox methods
+  EVRCSpecificBox(ISOControl* aControl);
+  ~EVRCSpecificBox();
+};
+
+// 3GPP TS 26.244 6.5 'EVRCSampleEntry box'
+// Box type: 'sevc'
+class EVRCSampleEntry : public AudioSampleEntry {
+public:
+  // 3GPP members
+  RefPtr<EVRCSpecificBox> evrc_special_box;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // EVRCSampleEntry methods
+  EVRCSampleEntry(ISOControl* aControl);
+  ~EVRCSampleEntry();
+};
+
+}
+
+#endif // EVRCBOX_h_
diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.cpp b/dom/media/encoder/fmp4_muxer/ISOControl.cpp
new file mode 100644
index 000000000..6addaeb30
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOControl.cpp
@@ -0,0 +1,415 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <time.h>
+#include "nsAutoPtr.h"
+#include "ISOControl.h"
+#include "ISOMediaBoxes.h"
+#include "EncodedFrameContainer.h"
+
+namespace mozilla {
+
+// For MP4 creation_time and modification_time offset from January 1, 1904 to
+// January 1, 1970.
+#define iso_time_offset 2082844800
+
+FragmentBuffer::FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration)
+  : mTrackType(aTrackType)
+  , mFragDuration(aFragDuration)
+  , mMediaStartTime(0)
+  , mFragmentNumber(0)
+  , mLastFrameTimeOfLastFragment(0)
+  , mEOS(false)
+{
+  mFragArray.AppendElement();
+  MOZ_COUNT_CTOR(FragmentBuffer);
+}
+
+FragmentBuffer::~FragmentBuffer()
+{
+  MOZ_COUNT_DTOR(FragmentBuffer);
+}
+
+bool
+FragmentBuffer::HasEnoughData()
+{
+  // Audio or video frame is enough to form a moof.
+  return (mFragArray.Length() > 1);
+}
+
+nsresult
+FragmentBuffer::GetCSD(nsTArray<uint8_t>& aCSD)
+{
+  if (!mCSDFrame) {
+    return NS_ERROR_FAILURE;
+  }
+  aCSD.AppendElements(mCSDFrame->GetFrameData().Elements(),
+                      mCSDFrame->GetFrameData().Length());
+
+  return NS_OK;
+}
+
+nsresult
+FragmentBuffer::AddFrame(EncodedFrame* aFrame)
+{
+  // already EOS, it rejects all new data.
+  if (mEOS) {
+    MOZ_ASSERT(0);
+    return NS_OK;
+  }
+
+  EncodedFrame::FrameType type = aFrame->GetFrameType();
+  if (type == EncodedFrame::AAC_CSD || type == EncodedFrame::AVC_CSD ||
+      type == EncodedFrame::AMR_AUDIO_CSD || type == EncodedFrame::EVRC_AUDIO_CSD) {
+    mCSDFrame = aFrame;
+    // Use CSD's timestamp as the start time. Encoder should send CSD frame first
+    // and then data frames.
+    mMediaStartTime = aFrame->GetTimeStamp();
+    mFragmentNumber = 1;
+    return NS_OK;
+  }
+
+  // if the timestamp is incorrect, abort it.
+  if (aFrame->GetTimeStamp() < mMediaStartTime) {
+    MOZ_ASSERT(false);
+    return NS_ERROR_FAILURE;
+  }
+
+  mFragArray.LastElement().AppendElement(aFrame);
+
+  // check if current fragment is reach the fragment duration.
+  if ((aFrame->GetTimeStamp() - mMediaStartTime) >= (mFragDuration * mFragmentNumber)) {
+    mFragArray.AppendElement();
+    mFragmentNumber++;
+  }
+
+  return NS_OK;
+}
+
+nsresult
+FragmentBuffer::GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment,
+                                 bool aFlush)
+{
+  // It should be called only if there is a complete fragment in mFragArray.
+  if (mFragArray.Length() <= 1 && !mEOS) {
+    MOZ_ASSERT(false);
+    return NS_ERROR_FAILURE;
+  }
+
+  if (aFlush) {
+    aFragment.SwapElements(mFragArray.ElementAt(0));
+    mFragArray.RemoveElementAt(0);
+  } else {
+    aFragment.AppendElements(mFragArray.ElementAt(0));
+  }
+  return NS_OK;
+}
+
+uint32_t
+FragmentBuffer::GetFirstFragmentSampleNumber()
+{
+  return mFragArray.ElementAt(0).Length();
+}
+
+uint32_t
+FragmentBuffer::GetFirstFragmentSampleSize()
+{
+  uint32_t size = 0;
+  uint32_t len = mFragArray.ElementAt(0).Length();
+  for (uint32_t i = 0; i < len; i++) {
+    size += mFragArray.ElementAt(0).ElementAt(i)->GetFrameData().Length();
+  }
+  return size;
+}
+
+ISOControl::ISOControl(uint32_t aMuxingType)
+  : mMuxingType(aMuxingType)
+  , mAudioFragmentBuffer(nullptr)
+  , mVideoFragmentBuffer(nullptr)
+  , mFragNum(0)
+  , mOutputSize(0)
+  , mBitCount(0)
+  , mBit(0)
+{
+  // Create a data array for first mp4 Box, ftyp.
+  mOutBuffers.SetLength(1);
+  MOZ_COUNT_CTOR(ISOControl);
+}
+
+ISOControl::~ISOControl()
+{
+  MOZ_COUNT_DTOR(ISOControl);
+}
+
+uint32_t
+ISOControl::GetNextTrackID()
+{
+  return (mMetaArray.Length() + 1);
+}
+
+uint32_t
+ISOControl::GetTrackID(TrackMetadataBase::MetadataKind aKind)
+{
+  for (uint32_t i = 0; i < mMetaArray.Length(); i++) {
+    if (mMetaArray[i]->GetKind() == aKind) {
+      return (i + 1);
+    }
+  }
+
+  // Track ID shouldn't be 0. It must be something wrong here.
+  MOZ_ASSERT(0);
+  return 0;
+}
+
+nsresult
+ISOControl::SetMetadata(TrackMetadataBase* aTrackMeta)
+{
+  if (aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AAC ||
+      aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AMR ||
+      aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AVC ||
+      aTrackMeta->GetKind() == TrackMetadataBase::METADATA_EVRC) {
+    mMetaArray.AppendElement(aTrackMeta);
+    return NS_OK;
+  }
+  return NS_ERROR_FAILURE;
+}
+
+nsresult
+ISOControl::GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta)
+{
+  for (uint32_t i = 0; i < mMetaArray.Length() ; i++) {
+    if (mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AAC ||
+        mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AMR ||
+        mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_EVRC) {
+      aAudMeta = static_cast<AudioTrackMetadata*>(mMetaArray[i].get());
+      return NS_OK;
+    }
+  }
+  return NS_ERROR_FAILURE;
+}
+
+nsresult
+ISOControl::GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta)
+{
+  for (uint32_t i = 0; i < mMetaArray.Length() ; i++) {
+    if (mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AVC) {
+      aVidMeta = static_cast<VideoTrackMetadata*>(mMetaArray[i].get());
+      return NS_OK;
+    }
+  }
+  return NS_ERROR_FAILURE;
+}
+
+bool
+ISOControl::HasAudioTrack()
+{
+  RefPtr<AudioTrackMetadata> audMeta;
+  GetAudioMetadata(audMeta);
+  return audMeta;
+}
+
+bool
+ISOControl::HasVideoTrack()
+{
+  RefPtr<VideoTrackMetadata> vidMeta;
+  GetVideoMetadata(vidMeta);
+  return vidMeta;
+}
+
+nsresult
+ISOControl::SetFragment(FragmentBuffer* aFragment)
+{
+  if (aFragment->GetType() == Audio_Track) {
+    mAudioFragmentBuffer = aFragment;
+  } else {
+    mVideoFragmentBuffer = aFragment;
+  }
+  return NS_OK;
+}
+
+FragmentBuffer*
+ISOControl::GetFragment(uint32_t aType)
+{
+  if (aType == Audio_Track) {
+    return mAudioFragmentBuffer;
+  } else if (aType == Video_Track){
+    return mVideoFragmentBuffer;
+  }
+  MOZ_ASSERT(0);
+  return nullptr;
+}
+
+nsresult
+ISOControl::GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs)
+{
+  uint32_t len = mOutBuffers.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    mOutBuffers[i].SwapElements(*aOutputBufs->AppendElement());
+  }
+  return FlushBuf();
+}
+
+nsresult
+ISOControl::FlushBuf()
+{
+  mOutBuffers.SetLength(1);
+  return NS_OK;
+}
+
+uint32_t
+ISOControl::WriteAVData(nsTArray<uint8_t>& aArray)
+{
+  MOZ_ASSERT(!mBitCount);
+
+  uint32_t len = aArray.Length();
+  if (!len) {
+    return 0;
+  }
+
+  mOutputSize += len;
+
+  // The last element already has data, allocated a new element for pointer
+  // swapping.
+  if (mOutBuffers.LastElement().Length()) {
+    mOutBuffers.AppendElement();
+  }
+  // Swap the video/audio data pointer.
+  mOutBuffers.LastElement().SwapElements(aArray);
+  // Following data could be boxes, so appending a new uint8_t array here.
+  mOutBuffers.AppendElement();
+
+  return len;
+}
+
+uint32_t
+ISOControl::WriteBits(uint64_t aBits, size_t aNumBits)
+{
+  uint8_t output_byte = 0;
+
+  MOZ_ASSERT(aNumBits <= 64);
+  // TODO: rewritten following with bitset?
+  for (size_t i = aNumBits; i > 0; i--) {
+    mBit |= (((aBits >> (i - 1)) & 1) << (8 - ++mBitCount));
+    if (mBitCount == 8) {
+      Write(&mBit, sizeof(uint8_t));
+      mBit = 0;
+      mBitCount = 0;
+      output_byte++;
+    }
+  }
+  return output_byte;
+}
+
+uint32_t
+ISOControl::Write(uint8_t* aBuf, uint32_t aSize)
+{
+  mOutBuffers.LastElement().AppendElements(aBuf, aSize);
+  mOutputSize += aSize;
+  return aSize;
+}
+
+uint32_t
+ISOControl::Write(uint8_t aData)
+{
+  MOZ_ASSERT(!mBitCount);
+  Write((uint8_t*)&aData, sizeof(uint8_t));
+  return sizeof(uint8_t);
+}
+
+uint32_t
+ISOControl::GetBufPos()
+{
+  uint32_t len = mOutBuffers.Length();
+  uint32_t pos = 0;
+  for (uint32_t i = 0; i < len; i++) {
+    pos += mOutBuffers.ElementAt(i).Length();
+  }
+  return pos;
+}
+
+uint32_t
+ISOControl::WriteFourCC(const char* aType)
+{
+  // Bit operation should be aligned to byte before writing any byte data.
+  MOZ_ASSERT(!mBitCount);
+
+  uint32_t size = strlen(aType);
+  if (size == 4) {
+    return Write((uint8_t*)aType, size);
+  }
+
+  return 0;
+}
+
+nsresult
+ISOControl::GenerateFtyp()
+{
+  nsresult rv;
+  uint32_t size;
+  nsAutoPtr<FileTypeBox> type_box(new FileTypeBox(this));
+  rv = type_box->Generate(&size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = type_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  return NS_OK;
+}
+
+nsresult
+ISOControl::GenerateMoov()
+{
+  nsresult rv;
+  uint32_t size;
+  nsAutoPtr<MovieBox> moov_box(new MovieBox(this));
+  rv = moov_box->Generate(&size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = moov_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  return NS_OK;
+}
+
+nsresult
+ISOControl::GenerateMoof(uint32_t aTrackType)
+{
+  mFragNum++;
+
+  nsresult rv;
+  uint32_t size;
+  uint64_t first_sample_offset = mOutputSize;
+  nsAutoPtr<MovieFragmentBox> moof_box(new MovieFragmentBox(aTrackType, this));
+  nsAutoPtr<MediaDataBox> mdat_box(new MediaDataBox(aTrackType, this));
+
+  rv = moof_box->Generate(&size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  first_sample_offset += size;
+  rv = mdat_box->Generate(&size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  first_sample_offset += mdat_box->FirstSampleOffsetInMediaDataBox();
+
+  // correct offset info
+  nsTArray<RefPtr<MuxerOperation>> tfhds;
+  rv = moof_box->Find(NS_LITERAL_CSTRING("tfhd"), tfhds);
+  NS_ENSURE_SUCCESS(rv, rv);
+  uint32_t len = tfhds.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    TrackFragmentHeaderBox* tfhd = (TrackFragmentHeaderBox*) tfhds.ElementAt(i).get();
+    rv = tfhd->UpdateBaseDataOffset(first_sample_offset);
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+
+  rv = moof_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = mdat_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+uint32_t
+ISOControl::GetTime()
+{
+  return (uint64_t)time(nullptr) + iso_time_offset;
+}
+
+}
diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.h b/dom/media/encoder/fmp4_muxer/ISOControl.h
new file mode 100644
index 000000000..3c445caee
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOControl.h
@@ -0,0 +1,250 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ISOCOMPOSITOR_H_
+#define ISOCOMPOSITOR_H_
+
+#include "mozilla/EndianUtils.h"
+#include "nsTArray.h"
+#include "ISOTrackMetadata.h"
+#include "EncodedFrameContainer.h"
+
+namespace mozilla {
+
+class Box;
+class ISOControl;
+
+/**
+ * This class collects elementary stream data to form a fragment.
+ * ISOMediaWriter will check if the data is enough; if yes, the corresponding
+ * moof will be created and write to ISOControl.
+ * Each audio and video has its own fragment and only one during the whole
+ * life cycle, when a fragment is formed in ISOControl, Flush() needs to
+ * be called to reset it.
+ */
+class FragmentBuffer {
+public:
+  // aTrackType: it could be Audio_Track or Video_Track.
+  // aFragDuration: it is the fragment duration. (microsecond per unit)
+  //                Audio and video have the same fragment duration.
+  FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration);
+  ~FragmentBuffer();
+
+  // Get samples of first fragment, that will swap all the elements in the
+  // mFragArray[0] when aFlush = true, and caller is responsible for drop
+  // EncodedFrame reference count.
+  nsresult GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment,
+                            bool aFlush = false);
+
+  // Add sample frame to the last element fragment of mFragArray. If sample
+  // number is enough, it will append a new fragment element. And the new
+  // sample will be added to the new fragment element of mFragArray.
+  nsresult AddFrame(EncodedFrame* aFrame);
+
+  // Get total sample size of first complete fragment size.
+  uint32_t GetFirstFragmentSampleSize();
+
+  // Get sample number of first complete fragment.
+  uint32_t GetFirstFragmentSampleNumber();
+
+  // Check if it accumulates enough frame data.
+  // It returns true when data is enough to form a fragment.
+  bool HasEnoughData();
+
+  // Called by ISOMediaWriter when TrackEncoder has sent the last frame. The
+  // remains frame data will form the last moof and move the state machine to
+  // in ISOMediaWriter to last phrase.
+  nsresult SetEndOfStream() {
+    mEOS = true;
+    return  NS_OK;
+  }
+  bool EOS() { return mEOS; }
+
+  // CSD (codec specific data), it is generated by encoder and the data depends
+  // on codec type. This data will be sent as a special frame from encoder to
+  // ISOMediaWriter and pass to this class via AddFrame().
+  nsresult GetCSD(nsTArray<uint8_t>& aCSD);
+
+  bool HasCSD() { return mCSDFrame; }
+
+  uint32_t GetType() { return mTrackType; }
+
+  void SetLastFragmentLastFrameTime(uint32_t aTime) {
+    mLastFrameTimeOfLastFragment = aTime;
+  }
+
+  uint32_t GetLastFragmentLastFrameTime() {
+    return mLastFrameTimeOfLastFragment;
+  }
+
+private:
+  uint32_t mTrackType;
+
+  // Fragment duration, microsecond per unit.
+  uint32_t mFragDuration;
+
+  // Media start time, microsecond per unit.
+  // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(),
+  // when the difference between current frame time and mMediaStartTime is
+  // exceeded current fragment ceiling timeframe, that means current fragment has
+  // enough data and a new element in mFragArray will be added.
+  uint64_t mMediaStartTime;
+
+  // Current fragment number. It will be increase when a new element of
+  // mFragArray is created.
+  // Note:
+  //   It only means the fragment number of current accumulated frames, not
+  //   the current 'creating' fragment mFragNum in ISOControl.
+  uint32_t mFragmentNumber;
+
+  // The last frame time stamp of last fragment. It is for calculating the
+  // play duration of first frame in current fragment. The frame duration is
+  // defined as "current frame timestamp - last frame timestamp" here. So it
+  // needs to keep the last timestamp of last fragment.
+  uint32_t mLastFrameTimeOfLastFragment;
+
+  // Array of fragments, each element has enough samples to form a
+  // complete fragment.
+  nsTArray<nsTArray<RefPtr<EncodedFrame>>> mFragArray;
+
+  // Codec specific data frame, it will be generated by encoder and send to
+  // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends
+  // on codec type.
+  RefPtr<EncodedFrame> mCSDFrame;
+
+  // END_OF_STREAM from ContainerWriter
+  bool mEOS;
+};
+
+/**
+ * ISOControl will be carried to each box when box is created. It is the main
+ * bridge for box to output stream to ContainerWriter and retrieve information.
+ * ISOControl acts 3 different roles:
+ * 1. Holds the pointer of audio metadata, video metadata, fragment and
+ *    pass them to boxes.
+ * 2. Provide the functions to generate the base structure of MP4; they are
+ *    GenerateFtyp, GenerateMoov, GenerateMoof, and GenerateMfra.
+ * 3. The actually writer used by MuxOperation::Write() in each box. It provides
+ *    writing methods for different kind of data; they are Write, WriteArray,
+ *    WriteBits...etc.
+ */
+class ISOControl {
+
+friend class Box;
+
+public:
+  ISOControl(uint32_t aMuxingType);
+  ~ISOControl();
+
+  nsresult GenerateFtyp();
+  nsresult GenerateMoov();
+  nsresult GenerateMoof(uint32_t aTrackType);
+
+  // Swap elementary stream pointer to output buffers.
+  uint32_t WriteAVData(nsTArray<uint8_t>& aArray);
+
+  uint32_t Write(uint8_t* aBuf, uint32_t aSize);
+
+  uint32_t Write(uint8_t aData);
+
+  template <typename T>
+  uint32_t Write(T aData) {
+    MOZ_ASSERT(!mBitCount);
+
+    aData = NativeEndian::swapToNetworkOrder(aData);
+    Write((uint8_t*)&aData, sizeof(T));
+    return sizeof(T);
+  }
+
+  template <typename T>
+  uint32_t WriteArray(const T &aArray, uint32_t aSize) {
+    MOZ_ASSERT(!mBitCount);
+
+    uint32_t size = 0;
+    for (uint32_t i = 0; i < aSize; i++) {
+      size += Write(aArray[i]);
+    }
+    return size;
+  }
+
+  uint32_t WriteFourCC(const char* aType);
+
+  // Bit writing. Note: it needs to be byte-boundary before using
+  // others non-bit writing function.
+  uint32_t WriteBits(uint64_t aBits, size_t aNumBits);
+
+  // This is called by GetContainerData and swap all the buffers to aOutputBuffers.
+  nsresult GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs);
+
+  // Presentation time in seconds since midnight, Jan. 1, 1904, in UTC time.
+  uint32_t GetTime();
+
+  // current fragment number
+  uint32_t GetCurFragmentNumber() { return mFragNum; }
+
+  nsresult SetFragment(FragmentBuffer* aFragment);
+  FragmentBuffer* GetFragment(uint32_t aType);
+
+  uint32_t GetMuxingType() { return mMuxingType; }
+
+  nsresult SetMetadata(TrackMetadataBase* aTrackMeta);
+  nsresult GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta);
+  nsresult GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta);
+
+  // Track ID is the Metadata index in mMetaArray. It allows only 1 audio
+  // track and 1 video track in this muxer. In this muxer, it is prohibt to have
+  // mutiple audio track or video track in the same file.
+  uint32_t GetTrackID(TrackMetadataBase::MetadataKind aKind);
+  uint32_t GetNextTrackID();
+
+  bool HasAudioTrack();
+  bool HasVideoTrack();
+
+private:
+  uint32_t GetBufPos();
+  nsresult FlushBuf();
+
+  // One of value in TYPE_XXX, defined in ISOMediaWriter.
+  uint32_t mMuxingType;
+
+  // Audio and video fragments are owned by ISOMediaWriter.
+  // They don't need to worry about pointer going stale because ISOMediaWriter's
+  // lifetime is longer than ISOControl.
+  FragmentBuffer* mAudioFragmentBuffer;
+  FragmentBuffer* mVideoFragmentBuffer;
+
+  // Generated fragment number
+  uint32_t mFragNum;
+
+  // The (index + 1) will be the track ID.
+  nsTArray<RefPtr<TrackMetadataBase>> mMetaArray;
+
+  // Array of output buffers.
+  // To save memory usage, audio/video sample will be swapped into a new element
+  // of this array.
+  //
+  // For example,
+  //   mOutBuffers[0] --> boxes (allocated by muxer)
+  //   mOutBuffers[1] --> video raw data (allocated by encoder)
+  //   mOutBuffers[2] --> video raw data (allocated by encoder)
+  //   mOutBuffers[3] --> video raw data (allocated by encoder)
+  //   mOutBuffers[4] --> boxes (allocated by muxer)
+  //   mOutBuffers[5] --> audio raw data (allocated by encoder)
+  //   ...etc.
+  //
+  nsTArray<nsTArray<uint8_t>> mOutBuffers;
+
+  // Accumulate output size from Write().
+  uint64_t mOutputSize;
+
+  // Bit writing operation. Note: the mBitCount should be 0 before any
+  // byte-boundary writing method be called (Write(uint32_t), Write(uint16_t)...etc);
+  // otherwise, there will be assertion on these functions.
+  uint8_t mBitCount;
+  uint8_t mBit;
+};
+
+}
+#endif
diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp
new file mode 100644
index 000000000..32a0c577b
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp
@@ -0,0 +1,1550 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <climits>
+#include "TrackMetadataBase.h"
+#include "ISOMediaBoxes.h"
+#include "ISOControl.h"
+#include "ISOMediaWriter.h"
+#include "EncodedFrameContainer.h"
+#include "ISOTrackMetadata.h"
+#include "MP4ESDS.h"
+#include "AMRBox.h"
+#include "AVCBox.h"
+#include "EVRCBox.h"
+#include "VideoUtils.h"
+
+namespace mozilla {
+
+// 14496-12 6.2.2 'Data Types and fields'
+const uint32_t iso_matrix[] = { 0x00010000, 0,          0,
+                                0,          0x00010000, 0,
+                                0,          0,          0x40000000 };
+
+uint32_t
+set_sample_flags(bool aSync)
+{
+  std::bitset<32> flags;
+  flags.set(16, !aSync);
+  return flags.to_ulong();
+}
+
+Box::BoxSizeChecker::BoxSizeChecker(ISOControl* aControl, uint32_t aSize)
+{
+  mControl = aControl;
+  ori_size = mControl->GetBufPos();
+  box_size = aSize;
+  MOZ_COUNT_CTOR(BoxSizeChecker);
+}
+
+Box::BoxSizeChecker::~BoxSizeChecker()
+{
+  uint32_t cur_size = mControl->GetBufPos();
+  if ((cur_size - ori_size) != box_size) {
+    MOZ_ASSERT(false);
+  }
+
+  MOZ_COUNT_DTOR(BoxSizeChecker);
+}
+
+nsresult
+MediaDataBox::Generate(uint32_t* aBoxSize)
+{
+  mFirstSampleOffset = size;
+  mAllSampleSize = 0;
+
+  if (mTrackType & Audio_Track) {
+    FragmentBuffer* frag = mControl->GetFragment(Audio_Track);
+    mAllSampleSize += frag->GetFirstFragmentSampleSize();
+  }
+  if (mTrackType & Video_Track) {
+    FragmentBuffer* frag = mControl->GetFragment(Video_Track);
+    mAllSampleSize += frag->GetFirstFragmentSampleSize();
+  }
+
+  size += mAllSampleSize;
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+MediaDataBox::Write()
+{
+  nsresult rv;
+  BoxSizeChecker checker(mControl, size);
+  Box::Write();
+  nsTArray<uint32_t> types;
+  types.AppendElement(Audio_Track);
+  types.AppendElement(Video_Track);
+
+  for (uint32_t l = 0; l < types.Length(); l++) {
+    if (mTrackType & types[l]) {
+      FragmentBuffer* frag = mControl->GetFragment(types[l]);
+      nsTArray<RefPtr<EncodedFrame>> frames;
+
+      // Here is the last time we get fragment frames, flush it!
+      rv = frag->GetFirstFragment(frames, true);
+      NS_ENSURE_SUCCESS(rv, rv);
+
+      uint32_t len = frames.Length();
+      for (uint32_t i = 0; i < len; i++) {
+        nsTArray<uint8_t> frame_buffer;
+        frames.ElementAt(i)->SwapOutFrameData(frame_buffer);
+        mControl->WriteAVData(frame_buffer);
+      }
+    }
+  }
+
+  return NS_OK;
+}
+
+MediaDataBox::MediaDataBox(uint32_t aTrackType, ISOControl* aControl)
+  : Box(NS_LITERAL_CSTRING("mdat"), aControl)
+  , mAllSampleSize(0)
+  , mFirstSampleOffset(0)
+  , mTrackType(aTrackType)
+{
+  MOZ_COUNT_CTOR(MediaDataBox);
+}
+
+MediaDataBox::~MediaDataBox()
+{
+  MOZ_COUNT_DTOR(MediaDataBox);
+}
+
+uint32_t
+TrackRunBox::fillSampleTable()
+{
+  uint32_t table_size = 0;
+  nsresult rv;
+  nsTArray<RefPtr<EncodedFrame>> frames;
+  FragmentBuffer* frag = mControl->GetFragment(mTrackType);
+
+  rv = frag->GetFirstFragment(frames);
+  if (NS_FAILED(rv)) {
+    return 0;
+  }
+  uint32_t len = frames.Length();
+  sample_info_table = MakeUnique<tbl[]>(len);
+  // Create sample table according to 14496-12 8.8.8.2.
+  for (uint32_t i = 0; i < len; i++) {
+    // Sample size.
+    sample_info_table[i].sample_size = 0;
+    if (flags.to_ulong() & flags_sample_size_present) {
+      sample_info_table[i].sample_size = frames.ElementAt(i)->GetFrameData().Length();
+      mAllSampleSize += sample_info_table[i].sample_size;
+      table_size += sizeof(uint32_t);
+    }
+
+    // Sample flags.
+    sample_info_table[i].sample_flags = 0;
+    if (flags.to_ulong() & flags_sample_flags_present) {
+      sample_info_table[i].sample_flags =
+        set_sample_flags(
+          (frames.ElementAt(i)->GetFrameType() == EncodedFrame::AVC_I_FRAME));
+      table_size += sizeof(uint32_t);
+    }
+
+    // Sample duration.
+    sample_info_table[i].sample_duration = 0;
+    if (flags.to_ulong() & flags_sample_duration_present) {
+      // Calculate each frame's duration, it is decided by "current frame
+      // timestamp - last frame timestamp".
+      uint64_t frame_time = 0;
+      if (i == 0) {
+        frame_time = frames.ElementAt(i)->GetTimeStamp() -
+                     frag->GetLastFragmentLastFrameTime();
+      } else {
+        frame_time = frames.ElementAt(i)->GetTimeStamp() -
+                     frames.ElementAt(i - 1)->GetTimeStamp();
+        // Keep the last frame time of current fagment, it will be used to calculate
+        // the first frame duration of next fragment.
+        if ((len - 1) == i) {
+          frag->SetLastFragmentLastFrameTime(frames.ElementAt(i)->GetTimeStamp());
+        }
+      }
+
+      // In TrackRunBox, there should be exactly one type, either audio or video.
+      MOZ_ASSERT((mTrackType & Video_Track) ^ (mTrackType & Audio_Track));
+      sample_info_table[i].sample_duration = (mTrackType & Video_Track ?
+        frame_time * mVideoMeta->GetVideoClockRate() / USECS_PER_S :
+        frame_time * mAudioMeta->GetAudioSampleRate() / USECS_PER_S);
+
+      table_size += sizeof(uint32_t);
+    }
+
+    sample_info_table[i].sample_composition_time_offset = 0;
+  }
+  return table_size;
+}
+
+nsresult
+TrackRunBox::Generate(uint32_t* aBoxSize)
+{
+  FragmentBuffer* frag = mControl->GetFragment(mTrackType);
+  sample_count = frag->GetFirstFragmentSampleNumber();
+  size += sizeof(sample_count);
+
+  // data_offset needs to be updated if there is other
+  // TrackRunBox before this one.
+  if (flags.to_ulong() & flags_data_offset_present) {
+    data_offset = 0;
+    size += sizeof(data_offset);
+  }
+  size += fillSampleTable();
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+TrackRunBox::SetDataOffset(uint32_t aOffset)
+{
+  data_offset = aOffset;
+  return NS_OK;
+}
+
+nsresult
+TrackRunBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(sample_count);
+  if (flags.to_ulong() & flags_data_offset_present) {
+    mControl->Write(data_offset);
+  }
+  for (uint32_t i = 0; i < sample_count; i++) {
+    if (flags.to_ulong() & flags_sample_duration_present) {
+      mControl->Write(sample_info_table[i].sample_duration);
+    }
+    if (flags.to_ulong() & flags_sample_size_present) {
+      mControl->Write(sample_info_table[i].sample_size);
+    }
+    if (flags.to_ulong() & flags_sample_flags_present) {
+      mControl->Write(sample_info_table[i].sample_flags);
+    }
+  }
+
+  return NS_OK;
+}
+
+TrackRunBox::TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("trun"), 0, aFlags, aControl)
+  , sample_count(0)
+  , data_offset(0)
+  , first_sample_flags(0)
+  , mAllSampleSize(0)
+  , mTrackType(aType)
+{
+  MOZ_COUNT_CTOR(TrackRunBox);
+}
+
+TrackRunBox::~TrackRunBox()
+{
+  MOZ_COUNT_DTOR(TrackRunBox);
+}
+
+nsresult
+TrackFragmentHeaderBox::UpdateBaseDataOffset(uint64_t aOffset)
+{
+  base_data_offset = aOffset;
+  return NS_OK;
+}
+
+nsresult
+TrackFragmentHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  track_ID = (mTrackType == Audio_Track ?
+                mControl->GetTrackID(mAudioMeta->GetKind()) :
+                mControl->GetTrackID(mVideoMeta->GetKind()));
+  size += sizeof(track_ID);
+
+  if (flags.to_ulong() & base_data_offset_present) {
+    // base_data_offset needs to add size of 'trun', 'tfhd' and
+    // header of 'mdat' later.
+    base_data_offset = 0;
+    size += sizeof(base_data_offset);
+  }
+  if (flags.to_ulong() & default_sample_duration_present) {
+    if (mTrackType == Video_Track) {
+      if (!mVideoMeta->GetVideoFrameRate()) {
+        // 0 means frame rate is variant, so it is wrong to write
+        // default_sample_duration.
+        MOZ_ASSERT(0);
+        default_sample_duration = 0;
+      } else {
+        default_sample_duration = mVideoMeta->GetVideoClockRate() / mVideoMeta->GetVideoFrameRate();
+      }
+    } else if (mTrackType == Audio_Track) {
+      default_sample_duration = mAudioMeta->GetAudioFrameDuration();
+    } else {
+      MOZ_ASSERT(0);
+      return NS_ERROR_FAILURE;
+    }
+    size += sizeof(default_sample_duration);
+  }
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+TrackFragmentHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(track_ID);
+  if (flags.to_ulong() & base_data_offset_present) {
+    mControl->Write(base_data_offset);
+  }
+  if (flags.to_ulong() & default_sample_duration_present) {
+    mControl->Write(default_sample_duration);
+  }
+  return NS_OK;
+}
+
+TrackFragmentHeaderBox::TrackFragmentHeaderBox(uint32_t aType,
+                                               uint32_t aFlags,
+                                               ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("tfhd"), 0, aFlags, aControl)
+  , track_ID(0)
+  , base_data_offset(0)
+  , default_sample_duration(0)
+{
+  mTrackType = aType;
+  MOZ_COUNT_CTOR(TrackFragmentHeaderBox);
+}
+
+TrackFragmentHeaderBox::~TrackFragmentHeaderBox()
+{
+  MOZ_COUNT_DTOR(TrackFragmentHeaderBox);
+}
+
+TrackFragmentBox::TrackFragmentBox(uint32_t aType, ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("traf"), aControl)
+  , mTrackType(aType)
+{
+  // Flags in TrackFragmentHeaderBox.
+  uint32_t tf_flags = base_data_offset_present;
+
+  // Ideally, audio encoder generates audio frame in const rate. However, some
+  // audio encoders don't do it so the audio frame duration needs to be checked
+  // here.
+  if ((mTrackType & Audio_Track) && mAudioMeta->GetAudioFrameDuration()) {
+    tf_flags |= default_sample_duration_present;
+  }
+
+  boxes.AppendElement(new TrackFragmentHeaderBox(aType, tf_flags, aControl));
+
+  // Always adds flags_data_offset_present in each TrackRunBox, Android
+  // parser requires this flag to calculate the correct bitstream offset.
+  uint32_t tr_flags = flags_sample_size_present | flags_data_offset_present;
+
+  // Flags in TrackRunBox.
+  // If there is no default sample duration exists, each frame duration needs to
+  // be recored in the TrackRunBox.
+  tr_flags |= (tf_flags & default_sample_duration_present ? 0 : flags_sample_duration_present);
+
+  // For video, add sample_flags to record I frame.
+  tr_flags |= (mTrackType & Video_Track ? flags_sample_flags_present : 0);
+
+  boxes.AppendElement(new TrackRunBox(mTrackType, tr_flags, aControl));
+  MOZ_COUNT_CTOR(TrackFragmentBox);
+}
+
+TrackFragmentBox::~TrackFragmentBox()
+{
+  MOZ_COUNT_DTOR(TrackFragmentBox);
+}
+
+nsresult
+MovieFragmentHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  sequence_number = mControl->GetCurFragmentNumber();
+  size += sizeof(sequence_number);
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+MovieFragmentHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(sequence_number);
+  return NS_OK;
+}
+
+MovieFragmentHeaderBox::MovieFragmentHeaderBox(uint32_t aTrackType,
+                                               ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("mfhd"), 0, 0, aControl)
+  , sequence_number(0)
+  , mTrackType(aTrackType)
+{
+  MOZ_COUNT_CTOR(MovieFragmentHeaderBox);
+}
+
+MovieFragmentHeaderBox::~MovieFragmentHeaderBox()
+{
+  MOZ_COUNT_DTOR(MovieFragmentHeaderBox);
+}
+
+MovieFragmentBox::MovieFragmentBox(uint32_t aType, ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("moof"), aControl)
+  , mTrackType(aType)
+{
+  boxes.AppendElement(new MovieFragmentHeaderBox(mTrackType, aControl));
+
+  if (mTrackType & Audio_Track) {
+    boxes.AppendElement(
+      new TrackFragmentBox(Audio_Track, aControl));
+  }
+  if (mTrackType & Video_Track) {
+    boxes.AppendElement(
+      new TrackFragmentBox(Video_Track, aControl));
+  }
+  MOZ_COUNT_CTOR(MovieFragmentBox);
+}
+
+MovieFragmentBox::~MovieFragmentBox()
+{
+  MOZ_COUNT_DTOR(MovieFragmentBox);
+}
+
+nsresult
+MovieFragmentBox::Generate(uint32_t* aBoxSize)
+{
+  nsresult rv = DefaultContainerImpl::Generate(aBoxSize);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  // Correct data_offset if there are both audio and video track in
+  // this fragment. This offset means the offset in the MediaDataBox.
+  if (mTrackType & (Audio_Track | Video_Track)) {
+    nsTArray<RefPtr<MuxerOperation>> truns;
+    rv = Find(NS_LITERAL_CSTRING("trun"), truns);
+    NS_ENSURE_SUCCESS(rv, rv);
+    uint32_t len = truns.Length();
+    uint32_t data_offset = 0;
+    for (uint32_t i = 0; i < len; i++) {
+      TrackRunBox* trun = (TrackRunBox*) truns.ElementAt(i).get();
+      rv = trun->SetDataOffset(data_offset);
+      NS_ENSURE_SUCCESS(rv, rv);
+      data_offset += trun->GetAllSampleSize();
+    }
+  }
+
+  return NS_OK;
+}
+
+nsresult
+TrackExtendsBox::Generate(uint32_t* aBoxSize)
+{
+  track_ID = (mTrackType == Audio_Track ?
+                mControl->GetTrackID(mAudioMeta->GetKind()) :
+                mControl->GetTrackID(mVideoMeta->GetKind()));
+
+  if (mTrackType == Audio_Track) {
+    default_sample_description_index = 1;
+    default_sample_duration = mAudioMeta->GetAudioFrameDuration();
+    default_sample_size = mAudioMeta->GetAudioFrameSize();
+    default_sample_flags = set_sample_flags(1);
+  } else if (mTrackType == Video_Track) {
+    default_sample_description_index = 1;
+    // Video meta data has assigned framerate, it implies that this video's
+    // frame rate should be fixed.
+    if (mVideoMeta->GetVideoFrameRate()) {
+      default_sample_duration =
+        mVideoMeta->GetVideoClockRate() / mVideoMeta->GetVideoFrameRate();
+    }
+    default_sample_size = 0;
+    default_sample_flags = set_sample_flags(0);
+  } else {
+    MOZ_ASSERT(0);
+    return NS_ERROR_FAILURE;
+  }
+
+  size += sizeof(track_ID) +
+          sizeof(default_sample_description_index) +
+          sizeof(default_sample_duration) +
+          sizeof(default_sample_size) +
+          sizeof(default_sample_flags);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+TrackExtendsBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(track_ID);
+  mControl->Write(default_sample_description_index);
+  mControl->Write(default_sample_duration);
+  mControl->Write(default_sample_size);
+  mControl->Write(default_sample_flags);
+
+  return NS_OK;
+}
+
+TrackExtendsBox::TrackExtendsBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("trex"), 0, 0, aControl)
+  , track_ID(0)
+  , default_sample_description_index(0)
+  , default_sample_duration(0)
+  , default_sample_size(0)
+  , default_sample_flags(0)
+  , mTrackType(aType)
+{
+  MOZ_COUNT_CTOR(TrackExtendsBox);
+}
+
+TrackExtendsBox::~TrackExtendsBox()
+{
+  MOZ_COUNT_DTOR(TrackExtendsBox);
+}
+
+MovieExtendsBox::MovieExtendsBox(ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("mvex"), aControl)
+{
+  if (mAudioMeta) {
+    boxes.AppendElement(new TrackExtendsBox(Audio_Track, aControl));
+  }
+  if (mVideoMeta) {
+    boxes.AppendElement(new TrackExtendsBox(Video_Track, aControl));
+  }
+  MOZ_COUNT_CTOR(MovieExtendsBox);
+}
+
+MovieExtendsBox::~MovieExtendsBox()
+{
+  MOZ_COUNT_DTOR(MovieExtendsBox);
+}
+
+nsresult
+ChunkOffsetBox::Generate(uint32_t* aBoxSize)
+{
+  // We don't need time to sample table in fragmented mp4.
+  entry_count = 0;
+  size += sizeof(entry_count);
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+ChunkOffsetBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(entry_count);
+  return NS_OK;
+}
+
+ChunkOffsetBox::ChunkOffsetBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("stco"), 0, 0, aControl)
+  , entry_count(0)
+{
+  MOZ_COUNT_CTOR(ChunkOffsetBox);
+}
+
+ChunkOffsetBox::~ChunkOffsetBox()
+{
+  MOZ_COUNT_DTOR(ChunkOffsetBox);
+}
+
+nsresult
+SampleToChunkBox::Generate(uint32_t* aBoxSize)
+{
+  // We don't need time to sample table in fragmented mp4
+  entry_count = 0;
+  size += sizeof(entry_count);
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+SampleToChunkBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(entry_count);
+  return NS_OK;
+}
+
+SampleToChunkBox::SampleToChunkBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("stsc"), 0, 0, aControl)
+  , entry_count(0)
+{
+  MOZ_COUNT_CTOR(SampleToChunkBox);
+}
+
+SampleToChunkBox::~SampleToChunkBox()
+{
+  MOZ_COUNT_DTOR(SampleToChunkBox);
+}
+
+nsresult
+TimeToSampleBox::Generate(uint32_t* aBoxSize)
+{
+  // We don't need time to sample table in fragmented mp4.
+  entry_count = 0;
+  size += sizeof(entry_count);
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+TimeToSampleBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(entry_count);
+  return NS_OK;
+}
+
+TimeToSampleBox::TimeToSampleBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("stts"), 0, 0, aControl)
+  , entry_count(0)
+{
+  MOZ_COUNT_CTOR(TimeToSampleBox);
+}
+
+TimeToSampleBox::~TimeToSampleBox()
+{
+  MOZ_COUNT_DTOR(TimeToSampleBox);
+}
+
+nsresult
+SampleDescriptionBox::Generate(uint32_t* aBoxSize)
+{
+  entry_count = 1;
+  size += sizeof(entry_count);
+
+  nsresult rv;
+  uint32_t box_size;
+  rv = sample_entry_box->Generate(&box_size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  size += box_size;
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+SampleDescriptionBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  nsresult rv;
+  mControl->Write(entry_count);
+  rv = sample_entry_box->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+SampleDescriptionBox::SampleDescriptionBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("stsd"), 0, 0, aControl)
+  , entry_count(0)
+{
+  mTrackType = aType;
+
+  switch (mTrackType) {
+  case Audio_Track:
+    {
+      CreateAudioSampleEntry(sample_entry_box);
+    }
+    break;
+  case Video_Track:
+    {
+      CreateVideoSampleEntry(sample_entry_box);
+    }
+    break;
+  }
+  MOZ_ASSERT(sample_entry_box);
+  MOZ_COUNT_CTOR(SampleDescriptionBox);
+}
+
+nsresult
+SampleDescriptionBox::CreateAudioSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry)
+{
+  if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_AMR) {
+    aSampleEntry = new AMRSampleEntry(mControl);
+  } else if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_AAC) {
+    aSampleEntry = new MP4AudioSampleEntry(mControl);
+  } else if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_EVRC) {
+    aSampleEntry = new EVRCSampleEntry(mControl);
+  } else {
+    MOZ_ASSERT(0);
+  }
+  return NS_OK;
+}
+
+nsresult
+SampleDescriptionBox::CreateVideoSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry)
+{
+  if (mVideoMeta->GetKind() == TrackMetadataBase::METADATA_AVC) {
+    aSampleEntry = new AVCSampleEntry(mControl);
+  } else {
+    MOZ_ASSERT(0);
+  }
+  return NS_OK;
+}
+
+SampleDescriptionBox::~SampleDescriptionBox()
+{
+  MOZ_COUNT_DTOR(SampleDescriptionBox);
+}
+
+nsresult
+SampleSizeBox::Generate(uint32_t* aBoxSize)
+{
+  size += sizeof(sample_size) +
+          sizeof(sample_count);
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+SampleSizeBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(sample_size);
+  mControl->Write(sample_count);
+  return NS_OK;
+}
+
+SampleSizeBox::SampleSizeBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("stsz"), 0, 0, aControl)
+  , sample_size(0)
+  , sample_count(0)
+{
+  MOZ_COUNT_CTOR(SampleSizeBox);
+}
+
+SampleSizeBox::~SampleSizeBox()
+{
+  MOZ_COUNT_DTOR(SampleSizeBox);
+}
+
+SampleTableBox::SampleTableBox(uint32_t aType, ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("stbl"), aControl)
+{
+  boxes.AppendElement(new SampleDescriptionBox(aType, aControl));
+  boxes.AppendElement(new TimeToSampleBox(aType, aControl));
+  boxes.AppendElement(new SampleToChunkBox(aType, aControl));
+  boxes.AppendElement(new SampleSizeBox(aControl));
+  boxes.AppendElement(new ChunkOffsetBox(aType, aControl));
+  MOZ_COUNT_CTOR(SampleTableBox);
+}
+
+SampleTableBox::~SampleTableBox()
+{
+  MOZ_COUNT_DTOR(SampleTableBox);
+}
+
+nsresult
+DataEntryUrlBox::Generate(uint32_t* aBoxSize)
+{
+  // location is null here, do nothing
+  size += location.Length();
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+DataEntryUrlBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  return NS_OK;
+}
+
+DataEntryUrlBox::DataEntryUrlBox()
+  : FullBox(NS_LITERAL_CSTRING("url "), 0, 0, (ISOControl*) nullptr)
+{
+  MOZ_COUNT_CTOR(DataEntryUrlBox);
+}
+
+DataEntryUrlBox::DataEntryUrlBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("url "), 0, flags_media_at_the_same_file, aControl)
+{
+  MOZ_COUNT_CTOR(DataEntryUrlBox);
+}
+
+DataEntryUrlBox::DataEntryUrlBox(const DataEntryUrlBox& aBox)
+  : FullBox(aBox.boxType, aBox.version, aBox.flags.to_ulong(), aBox.mControl)
+{
+  location = aBox.location;
+  MOZ_COUNT_CTOR(DataEntryUrlBox);
+}
+
+DataEntryUrlBox::~DataEntryUrlBox()
+{
+  MOZ_COUNT_DTOR(DataEntryUrlBox);
+}
+
+nsresult DataReferenceBox::Generate(uint32_t* aBoxSize)
+{
+  entry_count = 1;  // only allow on entry here
+  size += sizeof(uint32_t);
+
+  for (uint32_t i = 0; i < entry_count; i++) {
+    uint32_t box_size = 0;
+    DataEntryUrlBox* url = new DataEntryUrlBox(mControl);
+    url->Generate(&box_size);
+    size += box_size;
+    urls.AppendElement(url);
+  }
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult DataReferenceBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(entry_count);
+
+  for (uint32_t i = 0; i < entry_count; i++) {
+    urls[i]->Write();
+  }
+
+  return NS_OK;
+}
+
+DataReferenceBox::DataReferenceBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("dref"), 0, 0, aControl)
+  , entry_count(0)
+{
+  MOZ_COUNT_CTOR(DataReferenceBox);
+}
+
+DataReferenceBox::~DataReferenceBox()
+{
+  MOZ_COUNT_DTOR(DataReferenceBox);
+}
+
+DataInformationBox::DataInformationBox(ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("dinf"), aControl)
+{
+  boxes.AppendElement(new DataReferenceBox(aControl));
+  MOZ_COUNT_CTOR(DataInformationBox);
+}
+
+DataInformationBox::~DataInformationBox()
+{
+  MOZ_COUNT_DTOR(DataInformationBox);
+}
+
+nsresult
+VideoMediaHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  size += sizeof(graphicsmode) +
+          sizeof(opcolor);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+VideoMediaHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(graphicsmode);
+  mControl->WriteArray(opcolor, 3);
+  return NS_OK;
+}
+
+VideoMediaHeaderBox::VideoMediaHeaderBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("vmhd"), 0, 1, aControl)
+  , graphicsmode(0)
+{
+  memset(opcolor, 0 , sizeof(opcolor));
+  MOZ_COUNT_CTOR(VideoMediaHeaderBox);
+}
+
+VideoMediaHeaderBox::~VideoMediaHeaderBox()
+{
+  MOZ_COUNT_DTOR(VideoMediaHeaderBox);
+}
+
+nsresult
+SoundMediaHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  balance = 0;
+  reserved = 0;
+  size += sizeof(balance) +
+          sizeof(reserved);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+SoundMediaHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(balance);
+  mControl->Write(reserved);
+
+  return NS_OK;
+}
+
+SoundMediaHeaderBox::SoundMediaHeaderBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("smhd"), 0, 0, aControl)
+{
+  MOZ_COUNT_CTOR(SoundMediaHeaderBox);
+}
+
+SoundMediaHeaderBox::~SoundMediaHeaderBox()
+{
+  MOZ_COUNT_DTOR(SoundMediaHeaderBox);
+}
+
+MediaInformationBox::MediaInformationBox(uint32_t aType, ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("minf"), aControl)
+{
+  mTrackType = aType;
+
+  if (mTrackType == Audio_Track) {
+    boxes.AppendElement(new SoundMediaHeaderBox(aControl));
+  } else if (mTrackType == Video_Track) {
+    boxes.AppendElement(new VideoMediaHeaderBox(aControl));
+  } else {
+    MOZ_ASSERT(0);
+  }
+
+  boxes.AppendElement(new DataInformationBox(aControl));
+  boxes.AppendElement(new SampleTableBox(aType, aControl));
+  MOZ_COUNT_CTOR(MediaInformationBox);
+}
+
+MediaInformationBox::~MediaInformationBox()
+{
+  MOZ_COUNT_DTOR(MediaInformationBox);
+}
+
+nsresult
+HandlerBox::Generate(uint32_t* aBoxSize)
+{
+  pre_defined = 0;
+  if (mTrackType == Audio_Track) {
+    handler_type = FOURCC('s', 'o', 'u', 'n');
+  } else if (mTrackType == Video_Track) {
+    handler_type = FOURCC('v', 'i', 'd', 'e');
+  }
+
+  size += sizeof(pre_defined) +
+          sizeof(handler_type) +
+          sizeof(reserved);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+HandlerBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(pre_defined);
+  mControl->Write(handler_type);
+  mControl->WriteArray(reserved, 3);
+
+  return NS_OK;
+}
+
+HandlerBox::HandlerBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("hdlr"), 0, 0, aControl)
+  , pre_defined(0)
+  , handler_type(0)
+{
+  mTrackType = aType;
+  memset(reserved, 0 , sizeof(reserved));
+  MOZ_COUNT_CTOR(HandlerBox);
+}
+
+HandlerBox::~HandlerBox()
+{
+  MOZ_COUNT_DTOR(HandlerBox);
+}
+
+MediaHeaderBox::MediaHeaderBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("mdhd"), 0, 0, aControl)
+  , creation_time(0)
+  , modification_time(0)
+  , timescale(0)
+  , duration(0)
+  , pad(0)
+  , lang1(0)
+  , lang2(0)
+  , lang3(0)
+  , pre_defined(0)
+{
+  mTrackType = aType;
+  MOZ_COUNT_CTOR(MediaHeaderBox);
+}
+
+MediaHeaderBox::~MediaHeaderBox()
+{
+  MOZ_COUNT_DTOR(MediaHeaderBox);
+}
+
+uint32_t
+MediaHeaderBox::GetTimeScale()
+{
+  if (mTrackType == Audio_Track) {
+    return mAudioMeta->GetAudioSampleRate();
+  }
+
+  return mVideoMeta->GetVideoClockRate();
+}
+
+nsresult
+MediaHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  creation_time = mControl->GetTime();
+  modification_time = mControl->GetTime();
+  timescale = GetTimeScale();
+  duration = 0; // fragmented mp4
+
+  pad = 0;
+  lang1 = 'u' - 0x60; // "und" underdetermined language
+  lang2 = 'n' - 0x60;
+  lang3 = 'd' - 0x60;
+  size += (pad.size() + lang1.size() + lang2.size() + lang3.size()) / CHAR_BIT;
+
+  pre_defined = 0;
+  size += sizeof(creation_time) +
+          sizeof(modification_time) +
+          sizeof(timescale) +
+          sizeof(duration) +
+          sizeof(pre_defined);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+MediaHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(creation_time);
+  mControl->Write(modification_time);
+  mControl->Write(timescale);
+  mControl->Write(duration);
+  mControl->WriteBits(pad.to_ulong(), pad.size());
+  mControl->WriteBits(lang1.to_ulong(), lang1.size());
+  mControl->WriteBits(lang2.to_ulong(), lang2.size());
+  mControl->WriteBits(lang3.to_ulong(), lang3.size());
+  mControl->Write(pre_defined);
+
+  return NS_OK;
+}
+
+MovieBox::MovieBox(ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("moov"), aControl)
+{
+  boxes.AppendElement(new MovieHeaderBox(aControl));
+  if (aControl->HasAudioTrack()) {
+    boxes.AppendElement(new TrackBox(Audio_Track, aControl));
+  }
+  if (aControl->HasVideoTrack()) {
+    boxes.AppendElement(new TrackBox(Video_Track, aControl));
+  }
+  boxes.AppendElement(new MovieExtendsBox(aControl));
+  MOZ_COUNT_CTOR(MovieBox);
+}
+
+MovieBox::~MovieBox()
+{
+  MOZ_COUNT_DTOR(MovieBox);
+}
+
+nsresult
+MovieHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  creation_time = mControl->GetTime();
+  modification_time = mControl->GetTime();
+  timescale = GetTimeScale();
+  duration = 0;     // The duration is always 0 in fragmented mp4.
+  next_track_ID = mControl->GetNextTrackID();
+
+  size += sizeof(next_track_ID) +
+          sizeof(creation_time) +
+          sizeof(modification_time) +
+          sizeof(timescale) +
+          sizeof(duration) +
+          sizeof(rate) +
+          sizeof(volume) +
+          sizeof(reserved16) +
+          sizeof(reserved32) +
+          sizeof(matrix) +
+          sizeof(pre_defined);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+MovieHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(creation_time);
+  mControl->Write(modification_time);
+  mControl->Write(timescale);
+  mControl->Write(duration);
+  mControl->Write(rate);
+  mControl->Write(volume);
+  mControl->Write(reserved16);
+  mControl->WriteArray(reserved32, 2);
+  mControl->WriteArray(matrix, 9);
+  mControl->WriteArray(pre_defined, 6);
+  mControl->Write(next_track_ID);
+
+  return NS_OK;
+}
+
+uint32_t
+MovieHeaderBox::GetTimeScale()
+{
+  // Only audio track in container.
+  if (mAudioMeta && !mVideoMeta) {
+    return mAudioMeta->GetAudioSampleRate();
+  }
+
+  // return video rate
+  return mVideoMeta->GetVideoClockRate();
+}
+
+MovieHeaderBox::~MovieHeaderBox()
+{
+  MOZ_COUNT_DTOR(MovieHeaderBox);
+}
+
+MovieHeaderBox::MovieHeaderBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("mvhd"), 0, 0, aControl)
+  , creation_time(0)
+  , modification_time(0)
+  , timescale(90000)
+  , duration(0)
+  , rate(0x00010000)
+  , volume(0x0100)
+  , reserved16(0)
+  , next_track_ID(1)
+{
+  memcpy(matrix, iso_matrix, sizeof(matrix));
+  memset(reserved32, 0, sizeof(reserved32));
+  memset(pre_defined, 0, sizeof(pre_defined));
+  MOZ_COUNT_CTOR(MovieHeaderBox);
+}
+
+TrackHeaderBox::TrackHeaderBox(uint32_t aType, ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("tkhd"), 0,
+            flags_track_enabled | flags_track_in_movie | flags_track_in_preview,
+            aControl)
+  , creation_time(0)
+  , modification_time(0)
+  , track_ID(0)
+  , reserved(0)
+  , duration(0)
+  , layer(0)
+  , alternate_group(0)
+  , volume(0)
+  , reserved3(0)
+  , width(0)
+  , height(0)
+{
+  mTrackType = aType;
+  memcpy(matrix, iso_matrix, sizeof(matrix));
+  memset(reserved2, 0, sizeof(reserved2));
+  MOZ_COUNT_CTOR(TrackHeaderBox);
+}
+
+TrackHeaderBox::~TrackHeaderBox()
+{
+  MOZ_COUNT_DTOR(TrackHeaderBox);
+}
+
+nsresult
+TrackHeaderBox::Generate(uint32_t* aBoxSize)
+{
+  creation_time = mControl->GetTime();
+  modification_time = mControl->GetTime();
+  track_ID = (mTrackType == Audio_Track ?
+                mControl->GetTrackID(mAudioMeta->GetKind()) :
+                mControl->GetTrackID(mVideoMeta->GetKind()));
+  // fragmented mp4
+  duration = 0;
+
+  // volume, audiotrack is always 0x0100 in 14496-12 8.3.2.2
+  volume = (mTrackType == Audio_Track ? 0x0100 : 0);
+
+  if (mTrackType == Video_Track) {
+    width = mVideoMeta->GetVideoDisplayWidth() << 16;
+    height = mVideoMeta->GetVideoDisplayHeight() << 16;
+    // Check display size, using the pixel size if any of them is invalid.
+    if (!width || !height) {
+      width = mVideoMeta->GetVideoWidth() << 16;
+      height = mVideoMeta->GetVideoHeight() << 16;
+    }
+  }
+
+  size += sizeof(creation_time) +
+          sizeof(modification_time) +
+          sizeof(track_ID) +
+          sizeof(reserved) +
+          sizeof(duration) +
+          sizeof(reserved2) +
+          sizeof(layer) +
+          sizeof(alternate_group) +
+          sizeof(volume) +
+          sizeof(reserved3) +
+          sizeof(matrix) +
+          sizeof(width) +
+          sizeof(height);
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+TrackHeaderBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  mControl->Write(creation_time);
+  mControl->Write(modification_time);
+  mControl->Write(track_ID);
+  mControl->Write(reserved);
+  mControl->Write(duration);
+  mControl->WriteArray(reserved2, 2);
+  mControl->Write(layer);
+  mControl->Write(alternate_group);
+  mControl->Write(volume);
+  mControl->Write(reserved3);
+  mControl->WriteArray(matrix, 9);
+  mControl->Write(width);
+  mControl->Write(height);
+
+  return NS_OK;
+}
+
+nsresult
+FileTypeBox::Generate(uint32_t* aBoxSize)
+{
+  minor_version = 0;
+
+  if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_MP4) {
+    if (!mControl->HasVideoTrack() && mControl->HasAudioTrack()) {
+      major_brand = "M4A ";
+    } else {
+      major_brand = "MP42";
+    }
+    compatible_brands.AppendElement("mp42");
+    compatible_brands.AppendElement("isom");
+  } else if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_3GP) {
+    major_brand = "3gp9";
+    // According to 3GPP TS 26.244 V12.2.0, section 5.3.4, it's recommended to
+    // list all compatible brands here. 3GP spec supports fragment from '3gp6'.
+    compatible_brands.AppendElement("3gp9");
+    compatible_brands.AppendElement("3gp8");
+    compatible_brands.AppendElement("3gp7");
+    compatible_brands.AppendElement("3gp6");
+    compatible_brands.AppendElement("isom");
+  } else if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_3G2) {
+    major_brand = "3g2a";
+    // 3GPP2 Release 0 and A and 3GPP Release 6 allow movie fragmentation
+    compatible_brands.AppendElement("3gp9");
+    compatible_brands.AppendElement("3gp8");
+    compatible_brands.AppendElement("3gp7");
+    compatible_brands.AppendElement("3gp6");
+    compatible_brands.AppendElement("isom");
+    compatible_brands.AppendElement("3g2c");
+    compatible_brands.AppendElement("3g2b");
+    compatible_brands.AppendElement("3g2a");
+  } else {
+    MOZ_ASSERT(0);
+  }
+
+  size += major_brand.Length() +
+          sizeof(minor_version) +
+          compatible_brands.Length() * 4;
+
+  *aBoxSize = size;
+
+  return NS_OK;
+}
+
+nsresult
+FileTypeBox::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  Box::Write();
+  mControl->WriteFourCC(major_brand.get());
+  mControl->Write(minor_version);
+  uint32_t len = compatible_brands.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    mControl->WriteFourCC(compatible_brands[i].get());
+  }
+
+  return NS_OK;
+}
+
+FileTypeBox::FileTypeBox(ISOControl* aControl)
+  : Box(NS_LITERAL_CSTRING("ftyp"), aControl)
+  , minor_version(0)
+{
+  MOZ_COUNT_CTOR(FileTypeBox);
+}
+
+FileTypeBox::~FileTypeBox()
+{
+  MOZ_COUNT_DTOR(FileTypeBox);
+}
+
+MediaBox::MediaBox(uint32_t aType, ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("mdia"), aControl)
+{
+  mTrackType = aType;
+  boxes.AppendElement(new MediaHeaderBox(aType, aControl));
+  boxes.AppendElement(new HandlerBox(aType, aControl));
+  boxes.AppendElement(new MediaInformationBox(aType, aControl));
+  MOZ_COUNT_CTOR(MediaBox);
+}
+
+MediaBox::~MediaBox()
+{
+  MOZ_COUNT_DTOR(MediaBox);
+}
+
+nsresult
+DefaultContainerImpl::Generate(uint32_t* aBoxSize)
+{
+  nsresult rv;
+  uint32_t box_size;
+  uint32_t len = boxes.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    rv = boxes.ElementAt(i)->Generate(&box_size);
+    NS_ENSURE_SUCCESS(rv, rv);
+    size += box_size;
+  }
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+DefaultContainerImpl::Find(const nsACString& aType,
+                           nsTArray<RefPtr<MuxerOperation>>& aOperations)
+{
+  nsresult rv = Box::Find(aType, aOperations);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  uint32_t len = boxes.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    rv = boxes.ElementAt(i)->Find(aType, aOperations);
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+  return NS_OK;
+}
+
+nsresult
+DefaultContainerImpl::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  Box::Write();
+
+  nsresult rv;
+  uint32_t len = boxes.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    rv = boxes.ElementAt(i)->Write();
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+
+  return NS_OK;
+}
+
+DefaultContainerImpl::DefaultContainerImpl(const nsACString& aType,
+                                           ISOControl* aControl)
+  : Box(aType, aControl)
+{
+}
+
+nsresult
+Box::Write()
+{
+  mControl->Write(size);
+  mControl->WriteFourCC(boxType.get());
+  return NS_OK;
+}
+
+nsresult
+Box::Find(const nsACString& aType, nsTArray<RefPtr<MuxerOperation>>& aOperations)
+{
+  if (boxType == aType) {
+    aOperations.AppendElement(this);
+  }
+  return NS_OK;
+}
+
+Box::Box(const nsACString& aType, ISOControl* aControl)
+  : size(8), mControl(aControl)
+{
+  MOZ_ASSERT(aType.Length() == 4);
+  boxType = aType;
+  aControl->GetAudioMetadata(mAudioMeta);
+  aControl->GetVideoMetadata(mVideoMeta);
+}
+
+FullBox::FullBox(const nsACString& aType, uint8_t aVersion, uint32_t aFlags,
+                 ISOControl* aControl)
+  : Box(aType, aControl)
+{
+  std::bitset<24> tmp_flags(aFlags);
+  version = aVersion;
+  flags = tmp_flags;
+  size += sizeof(version) + flags.size() / CHAR_BIT;
+}
+
+nsresult
+FullBox::Write()
+{
+  Box::Write();
+  mControl->Write(version);
+  mControl->WriteBits(flags.to_ulong(), flags.size());
+  return NS_OK;
+}
+
+TrackBox::TrackBox(uint32_t aTrackType, ISOControl* aControl)
+  : DefaultContainerImpl(NS_LITERAL_CSTRING("trak"), aControl)
+{
+  boxes.AppendElement(new TrackHeaderBox(aTrackType, aControl));
+  boxes.AppendElement(new MediaBox(aTrackType, aControl));
+  MOZ_COUNT_CTOR(TrackBox);
+}
+
+TrackBox::~TrackBox()
+{
+  MOZ_COUNT_DTOR(TrackBox);
+}
+
+SampleEntryBox::SampleEntryBox(const nsACString& aFormat, ISOControl* aControl)
+  : Box(aFormat, aControl)
+  , data_reference_index(0)
+{
+  data_reference_index = 1; // There is only one data reference in each track.
+  size += sizeof(reserved) +
+          sizeof(data_reference_index);
+  memset(reserved, 0, sizeof(reserved));
+}
+
+nsresult
+SampleEntryBox::Write()
+{
+  Box::Write();
+  mControl->Write(reserved, sizeof(reserved));
+  mControl->Write(data_reference_index);
+  return NS_OK;
+}
+
+nsresult
+AudioSampleEntry::Write()
+{
+  SampleEntryBox::Write();
+  mControl->Write(sound_version);
+  mControl->Write(reserved2, sizeof(reserved2));
+  mControl->Write(channels);
+  mControl->Write(sample_size);
+  mControl->Write(compressionId);
+  mControl->Write(packet_size);
+  mControl->Write(timeScale);
+  return NS_OK;
+}
+
+AudioSampleEntry::AudioSampleEntry(const nsACString& aFormat, ISOControl* aControl)
+  : SampleEntryBox(aFormat, aControl)
+  , sound_version(0)
+  , channels(2)
+  , sample_size(16)
+  , compressionId(0)
+  , packet_size(0)
+  , timeScale(0)
+{
+  memset(reserved2, 0 , sizeof(reserved2));
+  channels = mAudioMeta->GetAudioChannels();
+  timeScale = mAudioMeta->GetAudioSampleRate() << 16;
+
+  size += sizeof(sound_version) +
+          sizeof(reserved2) +
+          sizeof(sample_size) +
+          sizeof(channels) +
+          sizeof(packet_size) +
+          sizeof(compressionId) +
+          sizeof(timeScale);
+
+  MOZ_COUNT_CTOR(AudioSampleEntry);
+}
+
+AudioSampleEntry::~AudioSampleEntry()
+{
+  MOZ_COUNT_DTOR(AudioSampleEntry);
+}
+
+nsresult
+VisualSampleEntry::Write()
+{
+  SampleEntryBox::Write();
+
+  mControl->Write(reserved, sizeof(reserved));
+  mControl->Write(width);
+  mControl->Write(height);
+  mControl->Write(horizresolution);
+  mControl->Write(vertresolution);
+  mControl->Write(reserved2);
+  mControl->Write(frame_count);
+  mControl->Write(compressorName, sizeof(compressorName));
+  mControl->Write(depth);
+  mControl->Write(pre_defined);
+
+  return NS_OK;
+}
+
+VisualSampleEntry::VisualSampleEntry(const nsACString& aFormat, ISOControl* aControl)
+  : SampleEntryBox(aFormat, aControl)
+  , width(0)
+  , height(0)
+  , horizresolution(resolution_72_dpi)
+  , vertresolution(resolution_72_dpi)
+  , reserved2(0)
+  , frame_count(1)
+  , depth(video_depth)
+  , pre_defined(-1)
+{
+  memset(reserved, 0 , sizeof(reserved));
+  memset(compressorName, 0 , sizeof(compressorName));
+
+  // both fields occupy 16 bits defined in 14496-2 6.2.3.
+  width = mVideoMeta->GetVideoWidth();
+  height = mVideoMeta->GetVideoHeight();
+
+  size += sizeof(reserved) +
+          sizeof(width) +
+          sizeof(height) +
+          sizeof(horizresolution) +
+          sizeof(vertresolution) +
+          sizeof(reserved2) +
+          sizeof(frame_count) +
+          sizeof(compressorName) +
+          sizeof(depth) +
+          sizeof(pre_defined);
+
+  MOZ_COUNT_CTOR(VisualSampleEntry);
+}
+
+VisualSampleEntry::~VisualSampleEntry()
+{
+  MOZ_COUNT_DTOR(VisualSampleEntry);
+}
+
+}
diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h
new file mode 100644
index 000000000..a6dc1b046
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h
@@ -0,0 +1,781 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ISOMediaBoxes_h_
+#define ISOMediaBoxes_h_
+
+#include <bitset>
+#include "nsString.h"
+#include "nsTArray.h"
+#include "nsAutoPtr.h"
+#include "MuxerOperation.h"
+#include "mozilla/UniquePtr.h"
+
+#define WRITE_FULLBOX(_compositor, _size)       \
+  BoxSizeChecker checker(_compositor, _size);   \
+  FullBox::Write();
+
+#define FOURCC(a, b, c, d) ( ((a) << 24) | ((b) << 16) | ((c) << 8) | (d) )
+
+namespace mozilla {
+
+/**
+ * track type from spec 8.4.3.3
+ */
+#define Audio_Track 0x01
+#define Video_Track 0x02
+
+class AudioTrackMetadata;
+class VideoTrackMetadata;
+class ISOControl;
+
+/**
+ * This is the base class for all ISO media format boxes.
+ * It provides the fields of box type(four CC) and size.
+ * The data members in the beginning of a Box (or its descendants)
+ * are the 14496-12 defined member. Other members prefix with 'm'
+ * are private control data.
+ *
+ * This class is for inherited only, it shouldn't be instanced directly.
+ */
+class Box : public MuxerOperation {
+protected:
+  // ISO BMFF members
+  uint32_t size;     // 14496-12 4-2 'Object Structure'. Size of this box.
+  nsCString boxType; // four CC name, all table names are listed in
+                     // 14496-12 table 1.
+
+public:
+  // MuxerOperation methods
+  nsresult Write() override;
+  nsresult Find(const nsACString& aType,
+                nsTArray<RefPtr<MuxerOperation>>& aOperations) override;
+
+  // This helper class will compare the written size in Write() and the size in
+  // Generate(). If their are not equal, it will assert.
+  class BoxSizeChecker {
+  public:
+    BoxSizeChecker(ISOControl* aControl, uint32_t aSize);
+    ~BoxSizeChecker();
+
+    uint32_t ori_size;
+    uint32_t box_size;
+    ISOControl* mControl;
+  };
+
+protected:
+  Box() = delete;
+  Box(const nsACString& aType, ISOControl* aControl);
+
+  ISOControl* mControl;
+  RefPtr<AudioTrackMetadata> mAudioMeta;
+  RefPtr<VideoTrackMetadata> mVideoMeta;
+};
+
+/**
+ * FullBox (and its descendants) is the box which contains the 'real' data
+ * members. It is the edge in the ISO box structure and it doesn't contain
+ * any box.
+ *
+ * This class is for inherited only, it shouldn't be instanced directly.
+ */
+class FullBox : public Box {
+public:
+  // ISO BMFF members
+  uint8_t version;       // 14496-12 4.2 'Object Structure'
+  std::bitset<24> flags; //
+
+  // MuxerOperation methods
+  nsresult Write() override;
+
+protected:
+  // FullBox methods
+  FullBox(const nsACString& aType, uint8_t aVersion, uint32_t aFlags,
+          ISOControl* aControl);
+  FullBox() = delete;
+};
+
+/**
+ * The default implementation of the container box.
+ * Basically, the container box inherits this class and overrides the
+ * constructor only.
+ *
+ * According to 14496-12 3.1.1 'container box', a container box is
+ * 'box whose sole purpose is to contain and group a set of related boxes'
+ *
+ * This class is for inherited only, it shouldn't be instanced directly.
+ */
+class DefaultContainerImpl : public Box {
+public:
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+  nsresult Find(const nsACString& aType,
+                nsTArray<RefPtr<MuxerOperation>>& aOperations) override;
+
+protected:
+  // DefaultContainerImpl methods
+  DefaultContainerImpl(const nsACString& aType, ISOControl* aControl);
+  DefaultContainerImpl() = delete;
+
+  nsTArray<RefPtr<MuxerOperation>> boxes;
+};
+
+// 14496-12 4.3 'File Type Box'
+// Box type: 'ftyp'
+class FileTypeBox : public Box {
+public:
+  // ISO BMFF members
+  nsCString major_brand; // four chars
+  uint32_t minor_version;
+  nsTArray<nsCString> compatible_brands;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // FileTypeBox methods
+  FileTypeBox(ISOControl* aControl);
+  ~FileTypeBox();
+};
+
+// 14496-12 8.2.1 'Movie Box'
+// Box type: 'moov'
+// MovieBox contains MovieHeaderBox, TrackBox and MovieExtendsBox.
+class MovieBox : public DefaultContainerImpl {
+public:
+  MovieBox(ISOControl* aControl);
+  ~MovieBox();
+};
+
+// 14496-12 8.2.2 'Movie Header Box'
+// Box type: 'mvhd'
+class MovieHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t creation_time;
+  uint32_t modification_time;
+  uint32_t timescale;
+  uint32_t duration;
+  uint32_t rate;
+  uint16_t volume;
+  uint16_t reserved16;
+  uint32_t reserved32[2];
+  uint32_t matrix[9];
+  uint32_t pre_defined[6];
+  uint32_t next_track_ID;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // MovieHeaderBox methods
+  MovieHeaderBox(ISOControl* aControl);
+  ~MovieHeaderBox();
+  uint32_t GetTimeScale();
+};
+
+// 14496-12 8.4.2 'Media Header Box'
+// Box type: 'mdhd'
+class MediaHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t creation_time;
+  uint32_t modification_time;
+  uint32_t timescale;
+  uint32_t duration;
+  std::bitset<1> pad;
+  std::bitset<5> lang1;
+  std::bitset<5> lang2;
+  std::bitset<5> lang3;
+  uint16_t pre_defined;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // MediaHeaderBox methods
+  MediaHeaderBox(uint32_t aType, ISOControl* aControl);
+  ~MediaHeaderBox();
+  uint32_t GetTimeScale();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.3.1 'Track Box'
+// Box type: 'trak'
+// TrackBox contains TrackHeaderBox and MediaBox.
+class TrackBox : public DefaultContainerImpl {
+public:
+  TrackBox(uint32_t aTrackType, ISOControl* aControl);
+  ~TrackBox();
+};
+
+// 14496-12 8.1.1 'Media Data Box'
+// Box type: 'mdat'
+class MediaDataBox : public Box {
+public:
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // MediaDataBox methods
+  uint32_t GetAllSampleSize() { return mAllSampleSize; }
+  uint32_t FirstSampleOffsetInMediaDataBox() { return mFirstSampleOffset; }
+  MediaDataBox(uint32_t aTrackType, ISOControl* aControl);
+  ~MediaDataBox();
+
+protected:
+  uint32_t mAllSampleSize;      // All audio and video sample size in this box.
+  uint32_t mFirstSampleOffset;  // The offset of first sample in this box from
+                                // the beginning of this mp4 file.
+  uint32_t mTrackType;
+};
+
+// flags for TrackRunBox::flags, 14496-12 8.8.8.1.
+#define flags_data_offset_present                     0x000001
+#define flags_first_sample_flags_present              0x000002
+#define flags_sample_duration_present                 0x000100
+#define flags_sample_size_present                     0x000200
+#define flags_sample_flags_present                    0x000400
+#define flags_sample_composition_time_offsets_present 0x000800
+
+// flag for TrackRunBox::tbl::sample_flags and TrackExtendsBox::default_sample_flags
+// which is defined in 14496-12 8.8.3.1.
+uint32_t set_sample_flags(bool aSync);
+
+// 14496-12 8.8.8 'Track Fragment Run Box'
+// Box type: 'trun'
+class TrackRunBox : public FullBox {
+public:
+  // ISO BMFF members
+  typedef struct {
+    uint32_t sample_duration;
+    uint32_t sample_size;
+    uint32_t sample_flags;
+    uint32_t sample_composition_time_offset;
+  } tbl;
+
+  uint32_t sample_count;
+  // the following are optional fields
+  uint32_t data_offset; // data offset exists when audio/video are present in file.
+  uint32_t first_sample_flags;
+  UniquePtr<tbl[]> sample_info_table;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // TrackRunBox methods
+  uint32_t GetAllSampleSize() { return mAllSampleSize; }
+  nsresult SetDataOffset(uint32_t aOffset);
+
+  TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl);
+  ~TrackRunBox();
+
+protected:
+  uint32_t fillSampleTable();
+
+  uint32_t mAllSampleSize;
+  uint32_t mTrackType;
+};
+
+// tf_flags in TrackFragmentHeaderBox, 14496-12 8.8.7.1.
+#define base_data_offset_present         0x000001
+#define sample_description_index_present 0x000002
+#define default_sample_duration_present  0x000008
+#define default_sample_size_present      0x000010
+#define default_sample_flags_present     0x000020
+#define duration_is_empty                0x010000
+#define default_base_is_moof             0x020000
+
+// 14496-12 8.8.7 'Track Fragment Header Box'
+// Box type: 'tfhd'
+class TrackFragmentHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t track_ID;
+  uint64_t base_data_offset;
+  uint32_t default_sample_duration;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // TrackFragmentHeaderBox methods
+  nsresult UpdateBaseDataOffset(uint64_t aOffset); // The offset of the first
+                                                   // sample in file.
+
+  TrackFragmentHeaderBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl);
+  ~TrackFragmentHeaderBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.8.6 'Track Fragment Box'
+// Box type: 'traf'
+// TrackFragmentBox cotains TrackFragmentHeaderBox and TrackRunBox.
+class TrackFragmentBox : public DefaultContainerImpl {
+public:
+  TrackFragmentBox(uint32_t aType, ISOControl* aControl);
+  ~TrackFragmentBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.8.5 'Movie Fragment Header Box'
+// Box type: 'mfhd'
+class MovieFragmentHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t sequence_number;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // MovieFragmentHeaderBox methods
+  MovieFragmentHeaderBox(uint32_t aType, ISOControl* aControl);
+  ~MovieFragmentHeaderBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.8.4 'Movie Fragment Box'
+// Box type: 'moof'
+// MovieFragmentBox contains MovieFragmentHeaderBox and TrackFragmentBox.
+class MovieFragmentBox : public DefaultContainerImpl {
+public:
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+
+  // MovieFragmentBox methods
+  MovieFragmentBox(uint32_t aType, ISOControl* aControl);
+  ~MovieFragmentBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.8.3 'Track Extends Box'
+// Box type: 'trex'
+class TrackExtendsBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t track_ID;
+  uint32_t default_sample_description_index;
+  uint32_t default_sample_duration;
+  uint32_t default_sample_size;
+  uint32_t default_sample_flags;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // TrackExtendsBox methods
+  TrackExtendsBox(uint32_t aType, ISOControl* aControl);
+  ~TrackExtendsBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.8.1 'Movie Extends Box'
+// Box type: 'mvex'
+// MovieExtendsBox contains TrackExtendsBox.
+class MovieExtendsBox : public DefaultContainerImpl {
+public:
+  MovieExtendsBox(ISOControl* aControl);
+  ~MovieExtendsBox();
+};
+
+// 14496-12 8.7.5 'Chunk Offset Box'
+// Box type: 'stco'
+class ChunkOffsetBox : public FullBox {
+public:
+  // ISO BMFF members
+  typedef struct {
+    uint32_t chunk_offset;
+  } tbl;
+
+  uint32_t entry_count;
+  UniquePtr<tbl[]> sample_tbl;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // ChunkOffsetBox methods
+  ChunkOffsetBox(uint32_t aType, ISOControl* aControl);
+  ~ChunkOffsetBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.7.4 'Sample To Chunk Box'
+// Box type: 'stsc'
+class SampleToChunkBox : public FullBox {
+public:
+  // ISO BMFF members
+  typedef struct {
+    uint32_t first_chunk;
+    uint32_t sample_per_chunk;
+    uint32_t sample_description_index;
+  } tbl;
+
+  uint32_t entry_count;
+  UniquePtr<tbl[]> sample_tbl;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // SampleToChunkBox methods
+  SampleToChunkBox(uint32_t aType, ISOControl* aControl);
+  ~SampleToChunkBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.6.1.2 'Decoding Time to Sample Box'
+// Box type: 'stts'
+class TimeToSampleBox : public FullBox {
+public:
+  // ISO BMFF members
+  typedef struct {
+    uint32_t sample_count;
+    uint32_t sample_delta;
+  } tbl;
+
+  uint32_t entry_count;
+  UniquePtr<tbl[]> sample_tbl;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // TimeToSampleBox methods
+  TimeToSampleBox(uint32_t aType, ISOControl* aControl);
+  ~TimeToSampleBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+/**
+ * 14496-12 8.5.2 'Sample Description Box'
+ * This is the base class for VisualSampleEntry and AudioSampleEntry.
+ *
+ * This class is for inherited only, it shouldn't be instanced directly.
+ *
+ * The inhertied tree of a codec box should be:
+ *
+ *                                            +--> AVCSampleEntry
+ *                  +--> VisualSampleEntryBox +
+ *                  |                         +--> ...
+ *   SampleEntryBox +
+ *                  |                         +--> MP4AudioSampleEntry
+ *                  +--> AudioSampleEntryBox  +
+ *                                            +--> AMRSampleEntry
+ *                                            +
+ *                                            +--> ...
+ *
+ */
+class SampleEntryBox : public Box {
+public:
+  // ISO BMFF members
+  uint8_t reserved[6];
+  uint16_t data_reference_index;
+
+  // sampleentrybox methods
+  SampleEntryBox(const nsACString& aFormat, ISOControl* aControl);
+
+  // MuxerOperation methods
+  nsresult Write() override;
+
+protected:
+  SampleEntryBox() = delete;
+};
+
+// 14496-12 8.5.2 'Sample Description Box'
+// Box type: 'stsd'
+class SampleDescriptionBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t entry_count;
+  RefPtr<SampleEntryBox> sample_entry_box;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // SampleDescriptionBox methods
+  SampleDescriptionBox(uint32_t aType, ISOControl* aControl);
+  ~SampleDescriptionBox();
+
+protected:
+  nsresult CreateAudioSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry);
+  nsresult CreateVideoSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry);
+
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.5.2.2
+// The base class for audio codec box.
+// This class is for inherited only, it shouldn't be instanced directly.
+class AudioSampleEntry : public SampleEntryBox {
+public:
+  // ISO BMFF members
+  uint16_t sound_version;
+  uint8_t reserved2[6];
+  uint16_t channels;
+  uint16_t sample_size;
+  uint16_t compressionId;
+  uint16_t packet_size;
+  uint32_t timeScale;  // (sample rate of media) <<16
+
+  // MuxerOperation methods
+  nsresult Write() override;
+
+  ~AudioSampleEntry();
+
+protected:
+  AudioSampleEntry(const nsACString& aFormat, ISOControl* aControl);
+};
+
+// 14496-12 8.5.2.2
+// The base class for video codec box.
+// This class is for inherited only, it shouldn't be instanced directly.
+class VisualSampleEntry : public SampleEntryBox {
+public:
+  // ISO BMFF members
+  uint8_t reserved[16];
+  uint16_t width;
+  uint16_t height;
+
+  uint32_t horizresolution; // 72 dpi
+  uint32_t vertresolution;  // 72 dpi
+  uint32_t reserved2;
+  uint16_t frame_count;     // 1, defined in 14496-12 8.5.2.2
+
+  uint8_t compressorName[32];
+  uint16_t depth;       // 0x0018, defined in 14496-12 8.5.2.2;
+  uint16_t pre_defined; // -1, defined in 14496-12 8.5.2.2;
+
+  // MuxerOperation methods
+  nsresult Write() override;
+
+  // VisualSampleEntry methods
+  ~VisualSampleEntry();
+
+protected:
+  VisualSampleEntry(const nsACString& aFormat, ISOControl* aControl);
+};
+
+// 14496-12 8.7.3.2 'Sample Size Box'
+// Box type: 'stsz'
+class SampleSizeBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t sample_size;
+  uint32_t sample_count;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // SampleSizeBox methods
+  SampleSizeBox(ISOControl* aControl);
+  ~SampleSizeBox();
+};
+
+// 14496-12 8.5.1 'Sample Table Box'
+// Box type: 'stbl'
+//
+// SampleTableBox contains SampleDescriptionBox,
+//                         TimeToSampleBox,
+//                         SampleToChunkBox,
+//                         SampleSizeBox and
+//                         ChunkOffsetBox.
+class SampleTableBox : public DefaultContainerImpl {
+public:
+  SampleTableBox(uint32_t aType, ISOControl* aControl);
+  ~SampleTableBox();
+};
+
+// 14496-12 8.7.2 'Data Reference Box'
+// Box type: 'url '
+class DataEntryUrlBox : public FullBox {
+public:
+  // ISO BMFF members
+  // flags in DataEntryUrlBox::flags
+  const static uint16_t flags_media_at_the_same_file = 0x0001;
+
+  nsCString location;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // DataEntryUrlBox methods
+  DataEntryUrlBox();
+  DataEntryUrlBox(ISOControl* aControl);
+  DataEntryUrlBox(const DataEntryUrlBox& aBox);
+  ~DataEntryUrlBox();
+};
+
+// 14496-12 8.7.2 'Data Reference Box'
+// Box type: 'dref'
+class DataReferenceBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t entry_count;
+  nsTArray<nsAutoPtr<DataEntryUrlBox>> urls;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // DataReferenceBox methods
+  DataReferenceBox(ISOControl* aControl);
+  ~DataReferenceBox();
+};
+
+// 14496-12 8.7.1 'Data Information Box'
+// Box type: 'dinf'
+// DataInformationBox contains DataReferenceBox.
+class DataInformationBox : public DefaultContainerImpl {
+public:
+  DataInformationBox(ISOControl* aControl);
+  ~DataInformationBox();
+};
+
+// 14496-12 8.4.5.2 'Video Media Header Box'
+// Box type: 'vmhd'
+class VideoMediaHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint16_t graphicsmode;
+  uint16_t opcolor[3];
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // VideoMediaHeaderBox methods
+  VideoMediaHeaderBox(ISOControl* aControl);
+  ~VideoMediaHeaderBox();
+};
+
+// 14496-12 8.4.5.3 'Sound Media Header Box'
+// Box type: 'smhd'
+class SoundMediaHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint16_t balance;
+  uint16_t reserved;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // SoundMediaHeaderBox methods
+  SoundMediaHeaderBox(ISOControl* aControl);
+  ~SoundMediaHeaderBox();
+};
+
+// 14496-12 8.4.4 'Media Information Box'
+// Box type: 'minf'
+// MediaInformationBox contains SoundMediaHeaderBox, DataInformationBox and
+// SampleTableBox.
+class MediaInformationBox : public DefaultContainerImpl {
+public:
+  MediaInformationBox(uint32_t aType, ISOControl* aControl);
+  ~MediaInformationBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// flags for TrackHeaderBox::flags.
+#define flags_track_enabled    0x000001
+#define flags_track_in_movie   0x000002
+#define flags_track_in_preview 0x000004
+
+// 14496-12 8.3.2 'Track Header Box'
+// Box type: 'tkhd'
+class TrackHeaderBox : public FullBox {
+public:
+  // ISO BMFF members
+  // version = 0
+  uint32_t creation_time;
+  uint32_t modification_time;
+  uint32_t track_ID;
+  uint32_t reserved;
+  uint32_t duration;
+
+  uint32_t reserved2[2];
+  uint16_t layer;
+  uint16_t alternate_group;
+  uint16_t volume;
+  uint16_t reserved3;
+  uint32_t matrix[9];
+  uint32_t width;
+  uint32_t height;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // TrackHeaderBox methods
+  TrackHeaderBox(uint32_t aType, ISOControl* aControl);
+  ~TrackHeaderBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.4.3 'Handler Reference Box'
+// Box type: 'hdlr'
+class HandlerBox : public FullBox {
+public:
+  // ISO BMFF members
+  uint32_t pre_defined;
+  uint32_t handler_type;
+  uint32_t reserved[3];
+  nsCString name;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // HandlerBox methods
+  HandlerBox(uint32_t aType, ISOControl* aControl);
+  ~HandlerBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+// 14496-12 8.4.1 'Media Box'
+// Box type: 'mdia'
+// MediaBox contains MediaHeaderBox, HandlerBox, and MediaInformationBox.
+class MediaBox : public DefaultContainerImpl {
+public:
+  MediaBox(uint32_t aType, ISOControl* aControl);
+  ~MediaBox();
+
+protected:
+  uint32_t mTrackType;
+};
+
+}
+#endif // ISOMediaBoxes_h_
diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp
new file mode 100644
index 000000000..fa23616e9
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp
@@ -0,0 +1,234 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ISOMediaWriter.h"
+#include "ISOControl.h"
+#include "ISOMediaBoxes.h"
+#include "ISOTrackMetadata.h"
+#include "nsThreadUtils.h"
+#include "MediaEncoder.h"
+#include "VideoUtils.h"
+#include "GeckoProfiler.h"
+
+#undef LOG
+#ifdef MOZ_WIDGET_GONK
+#include <android/log.h>
+#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
+#else
+#define LOG(args, ...)
+#endif
+
+namespace mozilla {
+
+const static uint32_t FRAG_DURATION = 2 * USECS_PER_S;    // microsecond per unit
+
+ISOMediaWriter::ISOMediaWriter(uint32_t aType, uint32_t aHint)
+  : ContainerWriter()
+  , mState(MUXING_HEAD)
+  , mBlobReady(false)
+  , mType(0)
+{
+  if (aType & CREATE_AUDIO_TRACK) {
+    mType |= Audio_Track;
+  }
+  if (aType & CREATE_VIDEO_TRACK) {
+    mType |= Video_Track;
+  }
+  mControl = new ISOControl(aHint);
+  MOZ_COUNT_CTOR(ISOMediaWriter);
+}
+
+ISOMediaWriter::~ISOMediaWriter()
+{
+  MOZ_COUNT_DTOR(ISOMediaWriter);
+}
+
+nsresult
+ISOMediaWriter::RunState()
+{
+  nsresult rv;
+  switch (mState) {
+    case MUXING_HEAD:
+    {
+      rv = mControl->GenerateFtyp();
+      NS_ENSURE_SUCCESS(rv, rv);
+      rv = mControl->GenerateMoov();
+      NS_ENSURE_SUCCESS(rv, rv);
+      mState = MUXING_FRAG;
+      break;
+    }
+    case MUXING_FRAG:
+    {
+      rv = mControl->GenerateMoof(mType);
+      NS_ENSURE_SUCCESS(rv, rv);
+
+      bool EOS;
+      if (ReadyToRunState(EOS) && EOS) {
+        mState = MUXING_DONE;
+      }
+      break;
+    }
+    case MUXING_DONE:
+    {
+      break;
+    }
+  }
+  mBlobReady = true;
+  return NS_OK;
+}
+
+nsresult
+ISOMediaWriter::WriteEncodedTrack(const EncodedFrameContainer& aData,
+                                  uint32_t aFlags)
+{
+  PROFILER_LABEL("ISOMediaWriter", "WriteEncodedTrack",
+    js::ProfileEntry::Category::OTHER);
+  // Muxing complete, it doesn't allowed to reentry again.
+  if (mState == MUXING_DONE) {
+    MOZ_ASSERT(false);
+    return NS_ERROR_FAILURE;
+  }
+
+  FragmentBuffer* frag = nullptr;
+  uint32_t len = aData.GetEncodedFrames().Length();
+
+  if (!len) {
+    // no frame? why bother to WriteEncodedTrack
+    return NS_OK;
+  }
+  for (uint32_t i = 0; i < len; i++) {
+    RefPtr<EncodedFrame> frame(aData.GetEncodedFrames()[i]);
+    EncodedFrame::FrameType type = frame->GetFrameType();
+    if (type == EncodedFrame::AAC_AUDIO_FRAME ||
+        type == EncodedFrame::AAC_CSD ||
+        type == EncodedFrame::AMR_AUDIO_FRAME ||
+        type == EncodedFrame::AMR_AUDIO_CSD ||
+        type == EncodedFrame::EVRC_AUDIO_FRAME ||
+        type == EncodedFrame::EVRC_AUDIO_CSD) {
+      frag = mAudioFragmentBuffer;
+    } else if (type == EncodedFrame::AVC_I_FRAME ||
+               type == EncodedFrame::AVC_P_FRAME ||
+               type == EncodedFrame::AVC_B_FRAME ||
+               type == EncodedFrame::AVC_CSD) {
+      frag = mVideoFragmentBuffer;
+    } else {
+      MOZ_ASSERT(0);
+      return NS_ERROR_FAILURE;
+    }
+
+    frag->AddFrame(frame);
+  }
+
+  // Encoder should send CSD (codec specific data) frame before sending the
+  // audio/video frames. When CSD data is ready, it is sufficient to generate a
+  // moov data. If encoder doesn't send CSD yet, muxer needs to wait before
+  // generating anything.
+  if (mType & Audio_Track && (!mAudioFragmentBuffer ||
+                              !mAudioFragmentBuffer->HasCSD())) {
+    return NS_OK;
+  }
+  if (mType & Video_Track && (!mVideoFragmentBuffer ||
+                              !mVideoFragmentBuffer->HasCSD())) {
+    return NS_OK;
+  }
+
+  // Only one FrameType in EncodedFrameContainer so it doesn't need to be
+  // inside the for-loop.
+  if (frag && (aFlags & END_OF_STREAM)) {
+    frag->SetEndOfStream();
+  }
+
+  nsresult rv;
+  bool EOS;
+  if (ReadyToRunState(EOS)) {
+    // Because track encoder won't generate new data after EOS, it needs to make
+    // sure the state reaches MUXING_DONE when EOS is signaled.
+    do {
+      rv = RunState();
+    } while (EOS && mState != MUXING_DONE);
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+
+  return NS_OK;
+}
+
+bool
+ISOMediaWriter::ReadyToRunState(bool& aEOS)
+{
+  aEOS = false;
+  bool bReadyToMux = true;
+  if ((mType & Audio_Track) && (mType & Video_Track)) {
+    if (!mAudioFragmentBuffer->HasEnoughData()) {
+      bReadyToMux = false;
+    }
+    if (!mVideoFragmentBuffer->HasEnoughData()) {
+      bReadyToMux = false;
+    }
+
+    if (mAudioFragmentBuffer->EOS() && mVideoFragmentBuffer->EOS()) {
+      aEOS = true;
+      bReadyToMux = true;
+    }
+  } else if (mType == Audio_Track) {
+    if (!mAudioFragmentBuffer->HasEnoughData()) {
+      bReadyToMux = false;
+    }
+    if (mAudioFragmentBuffer->EOS()) {
+      aEOS = true;
+      bReadyToMux = true;
+    }
+  } else if (mType == Video_Track) {
+    if (!mVideoFragmentBuffer->HasEnoughData()) {
+      bReadyToMux = false;
+    }
+    if (mVideoFragmentBuffer->EOS()) {
+      aEOS = true;
+      bReadyToMux = true;
+    }
+  }
+
+  return bReadyToMux;
+}
+
+nsresult
+ISOMediaWriter::GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs,
+                                 uint32_t aFlags)
+{
+  PROFILER_LABEL("ISOMediaWriter", "GetContainerData",
+    js::ProfileEntry::Category::OTHER);
+  if (mBlobReady) {
+    if (mState == MUXING_DONE) {
+      mIsWritingComplete = true;
+    }
+    mBlobReady = false;
+    return mControl->GetBufs(aOutputBufs);
+  }
+  return NS_OK;
+}
+
+nsresult
+ISOMediaWriter::SetMetadata(TrackMetadataBase* aMetadata)
+{
+  PROFILER_LABEL("ISOMediaWriter", "SetMetadata",
+    js::ProfileEntry::Category::OTHER);
+  if (aMetadata->GetKind() == TrackMetadataBase::METADATA_AAC ||
+      aMetadata->GetKind() == TrackMetadataBase::METADATA_AMR ||
+      aMetadata->GetKind() == TrackMetadataBase::METADATA_EVRC) {
+    mControl->SetMetadata(aMetadata);
+    mAudioFragmentBuffer = new FragmentBuffer(Audio_Track, FRAG_DURATION);
+    mControl->SetFragment(mAudioFragmentBuffer);
+    return NS_OK;
+  }
+  if (aMetadata->GetKind() == TrackMetadataBase::METADATA_AVC) {
+    mControl->SetMetadata(aMetadata);
+    mVideoFragmentBuffer = new FragmentBuffer(Video_Track, FRAG_DURATION);
+    mControl->SetFragment(mVideoFragmentBuffer);
+    return NS_OK;
+  }
+
+  return NS_ERROR_FAILURE;
+}
+
+} // namespace mozilla
diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h
new file mode 100644
index 000000000..cccbbe3cb
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h
@@ -0,0 +1,108 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ISOMediaWriter_h_
+#define ISOMediaWriter_h_
+
+#include "ContainerWriter.h"
+#include "nsAutoPtr.h"
+#include "nsIRunnable.h"
+
+namespace mozilla {
+
+class ISOControl;
+class FragmentBuffer;
+
+class ISOMediaWriter : public ContainerWriter
+{
+public:
+  // Generate an fragmented MP4 stream, ISO/IEC 14496-12.
+  // Brand names in 'ftyp' box are 'isom' and 'mp42'.
+  const static uint32_t TYPE_FRAG_MP4 = 1 << 0;
+
+  // Generate an fragmented 3GP stream, 3GPP TS 26.244,
+  // '5.4.3 Basic profile'.
+  // Brand names in 'ftyp' box are '3gp9' and 'isom'.
+  const static uint32_t TYPE_FRAG_3GP = 1 << 1;
+
+  // Generate an fragmented 3G2 stream, 3GPP2 C.S0050-B
+  // Brand names in 'ftyp' box are '3g2c' and 'isom'
+  const static uint32_t TYPE_FRAG_3G2 = 1 << 2;
+
+  // aType is the combination of CREATE_AUDIO_TRACK and CREATE_VIDEO_TRACK.
+  // It is a hint to muxer that the output streaming contains audio, video
+  // or both.
+  //
+  // aHint is one of the value in TYPE_XXXXXXXX. It is a hint to muxer what kind
+  // of ISO format should be generated.
+  ISOMediaWriter(uint32_t aType, uint32_t aHint = TYPE_FRAG_MP4);
+  ~ISOMediaWriter();
+
+  // ContainerWriter methods
+  nsresult WriteEncodedTrack(const EncodedFrameContainer &aData,
+                             uint32_t aFlags = 0) override;
+
+  nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs,
+                            uint32_t aFlags = 0) override;
+
+  nsresult SetMetadata(TrackMetadataBase* aMetadata) override;
+
+protected:
+  /**
+   * The state of each state will generate one or more blob.
+   * Each blob will be a moov, moof, moof... until receiving EOS.
+   * The generated sequence is:
+   *
+   *   moov -> moof -> moof -> ... -> moof -> moof
+   *
+   * Following is the details of each state.
+   *   MUXING_HEAD:
+   *     It collects the metadata to generate a moov. The state transits to
+   *     MUXING_HEAD after output moov blob.
+   *
+   *   MUXING_FRAG:
+   *     It collects enough audio/video data to generate a fragment blob. This
+   *     will be repeated until END_OF_STREAM and then transiting to MUXING_DONE.
+   *
+   *   MUXING_DONE:
+   *     End of ISOMediaWriter life cycle.
+   */
+  enum MuxState {
+    MUXING_HEAD,
+    MUXING_FRAG,
+    MUXING_DONE,
+  };
+
+private:
+  nsresult RunState();
+
+  // True if one of following conditions hold:
+  //   1. Audio/Video accumulates enough data to generate a moof.
+  //   2. Get EOS signal.
+  //   aEOS will be assigned to true if it gets EOS signal.
+  bool ReadyToRunState(bool& aEOS);
+
+  // The main class to generate and iso box. Its life time is same as
+  // ISOMediaWriter and deleted only if ISOMediaWriter is destroyed.
+  nsAutoPtr<ISOControl> mControl;
+
+  // Buffers to keep audio/video data frames, they are created when metadata is
+  // received. Only one instance for each media type is allowed and they will be
+  // deleted only if ISOMediaWriter is destroyed.
+  nsAutoPtr<FragmentBuffer> mAudioFragmentBuffer;
+  nsAutoPtr<FragmentBuffer> mVideoFragmentBuffer;
+
+  MuxState mState;
+
+  // A flag to indicate the output buffer is ready to blob out.
+  bool mBlobReady;
+
+  // Combination of Audio_Track or Video_Track.
+  uint32_t mType;
+};
+
+} // namespace mozilla
+
+#endif // ISOMediaWriter_h_
diff --git a/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h b/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h
new file mode 100644
index 000000000..3613e1e9e
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h
@@ -0,0 +1,131 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ISOTrackMetadata_h_
+#define ISOTrackMetadata_h_
+
+#include "TrackMetadataBase.h"
+
+namespace mozilla {
+
+class AACTrackMetadata : public AudioTrackMetadata {
+public:
+  // AudioTrackMetadata members
+  uint32_t GetAudioFrameDuration() override { return mFrameDuration; }
+  uint32_t GetAudioFrameSize() override { return mFrameSize; }
+  uint32_t GetAudioSampleRate() override { return mSampleRate; }
+  uint32_t GetAudioChannels() override { return mChannels; }
+
+  // TrackMetadataBase member
+  MetadataKind GetKind() const override { return METADATA_AAC; }
+
+  // AACTrackMetadata members
+  AACTrackMetadata()
+    : mSampleRate(0)
+    , mFrameDuration(0)
+    , mFrameSize(0)
+    , mChannels(0) {
+    MOZ_COUNT_CTOR(AACTrackMetadata);
+  }
+  ~AACTrackMetadata() { MOZ_COUNT_DTOR(AACTrackMetadata); }
+
+  uint32_t mSampleRate;     // From 14496-3 table 1.16, it could be 7350 ~ 96000.
+  uint32_t mFrameDuration;  // Audio frame duration based on SampleRate.
+  uint32_t mFrameSize;      // Audio frame size, 0 is variant size.
+  uint32_t mChannels;       // Channel number, it should be 1 or 2.
+};
+
+// AVC clock rate is 90k Hz.
+#define AVC_CLOCK_RATE 90000
+
+class AVCTrackMetadata : public VideoTrackMetadata {
+public:
+  // VideoTrackMetadata members
+  uint32_t GetVideoHeight() override { return mHeight; }
+  uint32_t GetVideoWidth() override {return mWidth; }
+  uint32_t GetVideoDisplayHeight() override { return mDisplayHeight; }
+  uint32_t GetVideoDisplayWidth() override { return mDisplayWidth;  }
+  uint32_t GetVideoClockRate() override { return AVC_CLOCK_RATE; }
+  uint32_t GetVideoFrameRate() override { return mFrameRate; }
+
+  // TrackMetadataBase member
+  MetadataKind GetKind() const override { return METADATA_AVC; }
+
+  // AVCTrackMetadata
+  AVCTrackMetadata()
+    : mHeight(0)
+    , mWidth(0)
+    , mDisplayHeight(0)
+    , mDisplayWidth(0)
+    , mFrameRate(0) {
+    MOZ_COUNT_CTOR(AVCTrackMetadata);
+  }
+  ~AVCTrackMetadata() { MOZ_COUNT_DTOR(AVCTrackMetadata); }
+
+  uint32_t mHeight;
+  uint32_t mWidth;
+  uint32_t mDisplayHeight;
+  uint32_t mDisplayWidth;
+  uint32_t mFrameRate;       // frames per second
+};
+
+
+// AMR sample rate is 8000 samples/s.
+#define AMR_SAMPLE_RATE 8000
+
+// Channel number is always 1.
+#define AMR_CHANNELS    1
+
+// AMR speech codec, 3GPP TS 26.071. Encoder and continer support AMR-NB only
+// currently.
+class AMRTrackMetadata : public AudioTrackMetadata {
+public:
+  // AudioTrackMetadata members
+  //
+  // The number of sample sets generates by encoder is variant. So the
+  // frame duration and frame size are both 0.
+  uint32_t GetAudioFrameDuration() override { return 0; }
+  uint32_t GetAudioFrameSize() override { return 0; }
+  uint32_t GetAudioSampleRate() override { return AMR_SAMPLE_RATE; }
+  uint32_t GetAudioChannels() override { return AMR_CHANNELS; }
+
+  // TrackMetadataBase member
+  MetadataKind GetKind() const override { return METADATA_AMR; }
+
+  // AMRTrackMetadata members
+  AMRTrackMetadata() { MOZ_COUNT_CTOR(AMRTrackMetadata); }
+  ~AMRTrackMetadata() { MOZ_COUNT_DTOR(AMRTrackMetadata); }
+};
+
+// EVRC sample rate is 8000 samples/s.
+#define EVRC_SAMPLE_RATE 8000
+
+class EVRCTrackMetadata : public AudioTrackMetadata {
+public:
+  // AudioTrackMetadata members
+  //
+  // The number of sample sets generates by encoder is variant. So the
+  // frame duration and frame size are both 0.
+  uint32_t GetAudioFrameDuration() override { return 0; }
+  uint32_t GetAudioFrameSize() override { return 0; }
+  uint32_t GetAudioSampleRate() override { return EVRC_SAMPLE_RATE; }
+  uint32_t GetAudioChannels() override { return mChannels; }
+
+  // TrackMetadataBase member
+  MetadataKind GetKind() const override { return METADATA_EVRC; }
+
+  // EVRCTrackMetadata members
+  EVRCTrackMetadata()
+    : mChannels(0) {
+    MOZ_COUNT_CTOR(EVRCTrackMetadata);
+  }
+  ~EVRCTrackMetadata() { MOZ_COUNT_DTOR(EVRCTrackMetadata); }
+
+  uint32_t mChannels;       // Channel number, it should be 1 or 2.
+};
+
+}
+
+#endif // ISOTrackMetadata_h_
diff --git a/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp b/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp
new file mode 100644
index 000000000..72880b5cb
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp
@@ -0,0 +1,138 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <climits>
+#include "ISOControl.h"
+#include "ISOMediaBoxes.h"
+#include "MP4ESDS.h"
+
+namespace mozilla {
+
+nsresult
+MP4AudioSampleEntry::Generate(uint32_t* aBoxSize)
+{
+  uint32_t box_size;
+  nsresult rv = es->Generate(&box_size);
+  NS_ENSURE_SUCCESS(rv, rv);
+  size += box_size;
+
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+MP4AudioSampleEntry::Write()
+{
+  BoxSizeChecker checker(mControl, size);
+  nsresult rv;
+  rv = AudioSampleEntry::Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = es->Write();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+MP4AudioSampleEntry::MP4AudioSampleEntry(ISOControl* aControl)
+  : AudioSampleEntry(NS_LITERAL_CSTRING("mp4a"), aControl)
+{
+  es = new ESDBox(aControl);
+  MOZ_COUNT_CTOR(MP4AudioSampleEntry);
+}
+
+MP4AudioSampleEntry::~MP4AudioSampleEntry()
+{
+  MOZ_COUNT_DTOR(MP4AudioSampleEntry);
+}
+
+nsresult
+ESDBox::Generate(uint32_t* aBoxSize)
+{
+  uint32_t box_size;
+  es_descriptor->Generate(&box_size);
+  size += box_size;
+  *aBoxSize = size;
+  return NS_OK;
+}
+
+nsresult
+ESDBox::Write()
+{
+  WRITE_FULLBOX(mControl, size)
+  es_descriptor->Write();
+  return NS_OK;
+}
+
+ESDBox::ESDBox(ISOControl* aControl)
+  : FullBox(NS_LITERAL_CSTRING("esds"), 0, 0, aControl)
+{
+  es_descriptor = new ES_Descriptor(aControl);
+  MOZ_COUNT_CTOR(ESDBox);
+}
+
+ESDBox::~ESDBox()
+{
+  MOZ_COUNT_DTOR(ESDBox);
+}
+
+nsresult
+ES_Descriptor::Find(const nsACString& aType,
+                    nsTArray<RefPtr<MuxerOperation>>& aOperations)
+{
+  // ES_Descriptor is not a real ISOMediaBox, so we return nothing here.
+  return NS_OK;
+}
+
+nsresult
+ES_Descriptor::Write()
+{
+  mControl->Write(tag);
+  mControl->Write(length);
+  mControl->Write(ES_ID);
+  mControl->WriteBits(streamDependenceFlag.to_ulong(), streamDependenceFlag.size());
+  mControl->WriteBits(URL_Flag.to_ulong(), URL_Flag.size());
+  mControl->WriteBits(reserved.to_ulong(), reserved.size());
+  mControl->WriteBits(streamPriority.to_ulong(), streamPriority.size());
+  mControl->Write(DecodeSpecificInfo.Elements(), DecodeSpecificInfo.Length());
+
+  return NS_OK;
+}
+
+nsresult
+ES_Descriptor::Generate(uint32_t* aBoxSize)
+{
+  nsresult rv;
+  //   14496-1 '8.3.4 DecoderConfigDescriptor'
+  //   14496-1 '10.2.3 SL Packet Header Configuration'
+  FragmentBuffer* frag = mControl->GetFragment(Audio_Track);
+  rv = frag->GetCSD(DecodeSpecificInfo);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  length = sizeof(ES_ID) + 1;
+  length += DecodeSpecificInfo.Length();
+
+  *aBoxSize = sizeof(tag) + sizeof(length) + length;
+  return NS_OK;
+}
+
+ES_Descriptor::ES_Descriptor(ISOControl* aControl)
+  : tag(ESDescrTag)
+  , length(0)
+  , ES_ID(0)
+  , streamDependenceFlag(0)
+  , URL_Flag(0)
+  , reserved(0)
+  , streamPriority(0)
+  , mControl(aControl)
+{
+  MOZ_COUNT_CTOR(ES_Descriptor);
+}
+
+ES_Descriptor::~ES_Descriptor()
+{
+  MOZ_COUNT_DTOR(ES_Descriptor);
+}
+
+}
diff --git a/dom/media/encoder/fmp4_muxer/MP4ESDS.h b/dom/media/encoder/fmp4_muxer/MP4ESDS.h
new file mode 100644
index 000000000..ee91312c1
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/MP4ESDS.h
@@ -0,0 +1,87 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MP4ESDS_h_
+#define MP4ESDS_h_
+
+#include "nsTArray.h"
+#include "MuxerOperation.h"
+
+namespace mozilla {
+
+class ISOControl;
+
+/**
+ * ESDS tag
+ */
+#define ESDescrTag        0x03
+
+/**
+ * 14496-1 '8.3.3 ES_Descriptor'.
+ * It will get DecoderConfigDescriptor and SLConfigDescriptor from
+ * AAC CSD data.
+ */
+class ES_Descriptor : public MuxerOperation {
+public:
+  // ISO BMFF members
+  uint8_t tag;      // ESDescrTag
+  uint8_t length;
+  uint16_t ES_ID;
+  std::bitset<1> streamDependenceFlag;
+  std::bitset<1> URL_Flag;
+  std::bitset<1> reserved;
+  std::bitset<5> streamPriority;
+
+  nsTArray<uint8_t> DecodeSpecificInfo;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+  nsresult Find(const nsACString& aType,
+                nsTArray<RefPtr<MuxerOperation>>& aOperations) override;
+
+  // ES_Descriptor methods
+  ES_Descriptor(ISOControl* aControl);
+  ~ES_Descriptor();
+
+protected:
+  ISOControl* mControl;
+};
+
+// 14496-14 5.6 'Sample Description Boxes'
+// Box type: 'esds'
+class ESDBox : public FullBox {
+public:
+  // ISO BMFF members
+  RefPtr<ES_Descriptor> es_descriptor;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // ESDBox methods
+  ESDBox(ISOControl* aControl);
+  ~ESDBox();
+};
+
+// 14496-14 5.6 'Sample Description Boxes'
+// Box type: 'mp4a'
+class MP4AudioSampleEntry : public AudioSampleEntry {
+public:
+  // ISO BMFF members
+  RefPtr<ESDBox> es;
+
+  // MuxerOperation methods
+  nsresult Generate(uint32_t* aBoxSize) override;
+  nsresult Write() override;
+
+  // MP4AudioSampleEntry methods
+  MP4AudioSampleEntry(ISOControl* aControl);
+  ~MP4AudioSampleEntry();
+};
+
+}
+
+#endif // MP4ESDS_h_
diff --git a/dom/media/encoder/fmp4_muxer/MuxerOperation.h b/dom/media/encoder/fmp4_muxer/MuxerOperation.h
new file mode 100644
index 000000000..0b83c89b0
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/MuxerOperation.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsString.h"
+#include "nsTArray.h"
+
+#ifndef MuxerOperation_h_
+#define MuxerOperation_h_
+
+namespace mozilla {
+
+/**
+ * The interface for ISO box. All Boxes inherit from this interface.
+ * Generate() and Write() are needed to be called to produce a complete box.
+ *
+ * Generate() will generate all the data structures and their size.
+ *
+ * Write() will write all data into muxing output stream (ISOControl actually)
+ * and update the data which can't be known at Generate() (for example, the
+ * offset of the video data in mp4 file).
+ *
+ * ISO base media format is composed of several container boxes and the contained
+ * boxes. The container boxes hold a list of MuxerOperation which is implemented
+ * by contained boxes. The contained boxes will be called via the list.
+ * For example:
+ *   MovieBox (container) ---> boxes (array of MuxerOperation)
+ *                              |---> MovieHeaderBox (full box)
+ *                              |---> TrakBox (container)
+ *                              |---> MovieExtendsBox (container)
+ *
+ * The complete box structure can be found at 14496-12 E.2 "The‘isom’brand".
+ */
+class MuxerOperation {
+public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MuxerOperation)
+
+  // Generate data of this box and its contained box, and calculate box size.
+  virtual nsresult Generate(uint32_t* aBoxSize) = 0;
+
+  // Write data to stream.
+  virtual nsresult Write() = 0;
+
+  // Find the box type via its name (name is the box type defined in 14496-12;
+  // for example, 'moov' is the name of MovieBox).
+  // It can only look child boxes including itself and the box in the boxes
+  // list if exists. It can't look parent boxes.
+  virtual nsresult Find(const nsACString& aType,
+                        nsTArray<RefPtr<MuxerOperation>>& aOperations) = 0;
+
+protected:
+  virtual ~MuxerOperation() {}
+};
+
+}
+#endif
diff --git a/dom/media/encoder/fmp4_muxer/moz.build b/dom/media/encoder/fmp4_muxer/moz.build
new file mode 100644
index 000000000..5ff274be5
--- /dev/null
+++ b/dom/media/encoder/fmp4_muxer/moz.build
@@ -0,0 +1,22 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+    'ISOMediaWriter.h',
+    'ISOTrackMetadata.h',
+]
+
+UNIFIED_SOURCES += [
+    'AMRBox.cpp',
+    'AVCBox.cpp',
+    'EVRCBox.cpp',
+    'ISOControl.cpp',
+    'ISOMediaBoxes.cpp',
+    'ISOMediaWriter.cpp',
+    'MP4ESDS.cpp',
+]
+
+FINAL_LIBRARY = 'xul'
diff --git a/dom/media/encoder/moz.build b/dom/media/encoder/moz.build
new file mode 100644
index 000000000..0d5cdc16f
--- /dev/null
+++ b/dom/media/encoder/moz.build
@@ -0,0 +1,56 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files('*'):
+    BUG_COMPONENT = ('Core', 'Video/Audio: Recording')
+
+if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk':
+    DIRS += ['fmp4_muxer']
+
+EXPORTS += [
+    'ContainerWriter.h',
+    'EncodedFrameContainer.h',
+    'MediaEncoder.h',
+    'OpusTrackEncoder.h',
+    'TrackEncoder.h',
+    'TrackMetadataBase.h',
+]
+
+UNIFIED_SOURCES += [
+    'MediaEncoder.cpp',
+    'OpusTrackEncoder.cpp',
+    'TrackEncoder.cpp',
+]
+
+if CONFIG['MOZ_WEBM_ENCODER']:
+    EXPORTS += ['VP8TrackEncoder.h',
+    ]
+    UNIFIED_SOURCES += ['VP8TrackEncoder.cpp',
+    ]
+    LOCAL_INCLUDES += ['/media/libyuv/include']
+
+FINAL_LIBRARY = 'xul'
+
+# These includes are from Android JB, for use of MediaCodec.
+LOCAL_INCLUDES += ['/ipc/chromium/src']
+
+if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk' and CONFIG['ANDROID_VERSION'] > '15':
+    LOCAL_INCLUDES += [
+        '%' + '%s/%s' % (CONFIG['ANDROID_SOURCE'], d) for d in [
+            'frameworks/av/include/media',
+            'frameworks/native/include',
+            'frameworks/native/opengl/include',
+        ]
+
+    ]
+
+include('/ipc/chromium/chromium-config.mozbuild')
+
+# Suppress some GCC warnings being treated as errors:
+#  - about attributes on forward declarations for types that are already
+#    defined, which complains about an important MOZ_EXPORT for android::AString
+if CONFIG['GNU_CC']:
+  CXXFLAGS += ['-Wno-error=attributes']
author	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
committer	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
commit	5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree	10027f336435511475e392454359edea8e25895d /dom/media/encoder
parent	49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download	UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip