diff options
Diffstat (limited to 'dom/media/encoder')
29 files changed, 7204 insertions, 0 deletions
diff --git a/dom/media/encoder/ContainerWriter.h b/dom/media/encoder/ContainerWriter.h new file mode 100644 index 000000000..1bd66cbc6 --- /dev/null +++ b/dom/media/encoder/ContainerWriter.h @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ContainerWriter_h_ +#define ContainerWriter_h_ + +#include "nsTArray.h" +#include "EncodedFrameContainer.h" +#include "TrackMetadataBase.h" + +namespace mozilla { +/** + * ContainerWriter packs encoded track data into a specific media container. + */ +class ContainerWriter { +public: + ContainerWriter() + : mInitialized(false) + , mIsWritingComplete(false) + {} + virtual ~ContainerWriter() {} + // Mapping to DOMLocalMediaStream::TrackTypeHints + enum { + CREATE_AUDIO_TRACK = 1 << 0, + CREATE_VIDEO_TRACK = 1 << 1, + }; + enum { + END_OF_STREAM = 1 << 0 + }; + + /** + * Writes encoded track data from aBuffer to a packet, and insert this packet + * into the internal stream of container writer. aDuration is the playback + * duration of this packet in number of samples. aFlags is true with + * END_OF_STREAM if this is the last packet of track. + * Currently, WriteEncodedTrack doesn't support multiple tracks. + */ + virtual nsresult WriteEncodedTrack(const EncodedFrameContainer& aData, + uint32_t aFlags = 0) = 0; + + /** + * Set the meta data pointer into muxer + * This function will check the integrity of aMetadata. + * If the meta data isn't well format, this function will return NS_ERROR_FAILURE to caller, + * else save the pointer to mMetadata and return NS_OK. + */ + virtual nsresult SetMetadata(TrackMetadataBase* aMetadata) = 0; + + /** + * Indicate if the writer has finished to output data + */ + virtual bool IsWritingComplete() { return mIsWritingComplete; } + + enum { + FLUSH_NEEDED = 1 << 0, + GET_HEADER = 1 << 1 + }; + + /** + * Copies the final container data to a buffer if it has accumulated enough + * packets from WriteEncodedTrack. This buffer of data is appended to + * aOutputBufs, and existing elements of aOutputBufs should not be modified. + * aFlags is true with FLUSH_NEEDED will force OggWriter to flush an ogg page + * even it is not full, and copy these container data to a buffer for + * aOutputBufs to append. + */ + virtual nsresult GetContainerData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + uint32_t aFlags = 0) = 0; +protected: + bool mInitialized; + bool mIsWritingComplete; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/EncodedFrameContainer.h b/dom/media/encoder/EncodedFrameContainer.h new file mode 100644 index 000000000..8b7512466 --- /dev/null +++ b/dom/media/encoder/EncodedFrameContainer.h @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EncodedFrameContainer_H_ +#define EncodedFrameContainer_H_ + +#include "nsTArray.h" + +namespace mozilla { + +class EncodedFrame; + +/* + * This container is used to carry video or audio encoded data from encoder to muxer. + * The media data object is created by encoder and recycle by the destructor. + * Only allow to store audio or video encoded data in EncodedData. + */ +class EncodedFrameContainer +{ +public: + // Append encoded frame data + void AppendEncodedFrame(EncodedFrame* aEncodedFrame) + { + mEncodedFrames.AppendElement(aEncodedFrame); + } + // Retrieve all of the encoded frames + const nsTArray<RefPtr<EncodedFrame> >& GetEncodedFrames() const + { + return mEncodedFrames; + } +private: + // This container is used to store the video or audio encoded packets. + // Muxer should check mFrameType and get the encoded data type from mEncodedFrames. + nsTArray<RefPtr<EncodedFrame> > mEncodedFrames; +}; + +// Represent one encoded frame +class EncodedFrame final +{ + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncodedFrame) +public: + EncodedFrame() : + mTimeStamp(0), + mDuration(0), + mFrameType(UNKNOWN) + {} + enum FrameType { + VP8_I_FRAME, // VP8 intraframe + VP8_P_FRAME, // VP8 predicted frame + OPUS_AUDIO_FRAME, // Opus audio frame + VORBIS_AUDIO_FRAME, + AVC_I_FRAME, + AVC_P_FRAME, + AVC_B_FRAME, + AVC_CSD, // AVC codec specific data + AAC_AUDIO_FRAME, + AAC_CSD, // AAC codec specific data + AMR_AUDIO_CSD, + AMR_AUDIO_FRAME, + EVRC_AUDIO_CSD, + EVRC_AUDIO_FRAME, + UNKNOWN // FrameType not set + }; + void SwapInFrameData(nsTArray<uint8_t>& aData) + { + mFrameData.SwapElements(aData); + } + nsresult SwapOutFrameData(nsTArray<uint8_t>& aData) + { + if (mFrameType != UNKNOWN) { + // Reset this frame type to UNKNOWN once the data is swapped out. + mFrameData.SwapElements(aData); + mFrameType = UNKNOWN; + return NS_OK; + } + return NS_ERROR_FAILURE; + } + const nsTArray<uint8_t>& GetFrameData() const + { + return mFrameData; + } + uint64_t GetTimeStamp() const { return mTimeStamp; } + void SetTimeStamp(uint64_t aTimeStamp) { mTimeStamp = aTimeStamp; } + + uint64_t GetDuration() const { return mDuration; } + void SetDuration(uint64_t aDuration) { mDuration = aDuration; } + + FrameType GetFrameType() const { return mFrameType; } + void SetFrameType(FrameType aFrameType) { mFrameType = aFrameType; } +private: + // Private destructor, to discourage deletion outside of Release(): + ~EncodedFrame() + { + } + + // Encoded data + nsTArray<uint8_t> mFrameData; + uint64_t mTimeStamp; + // The playback duration of this packet in number of samples + uint64_t mDuration; + // Represent what is in the FrameData + FrameType mFrameType; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/MediaEncoder.cpp b/dom/media/encoder/MediaEncoder.cpp new file mode 100644 index 000000000..864b486e4 --- /dev/null +++ b/dom/media/encoder/MediaEncoder.cpp @@ -0,0 +1,404 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "MediaEncoder.h" +#include "MediaDecoder.h" +#include "nsIPrincipal.h" +#include "nsMimeTypes.h" +#include "mozilla/Logging.h" +#include "mozilla/Preferences.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/gfx/Point.h" // IntSize + +#include"GeckoProfiler.h" +#include "OggWriter.h" +#include "OpusTrackEncoder.h" + +#ifdef MOZ_WEBM_ENCODER +#include "VP8TrackEncoder.h" +#include "WebMWriter.h" +#endif + +#ifdef LOG +#undef LOG +#endif + +mozilla::LazyLogModule gMediaEncoderLog("MediaEncoder"); +#define LOG(type, msg) MOZ_LOG(gMediaEncoderLog, type, msg) + +namespace mozilla { + +void +MediaStreamVideoRecorderSink::SetCurrentFrames(const VideoSegment& aSegment) +{ + MOZ_ASSERT(mVideoEncoder); + mVideoEncoder->SetCurrentFrames(aSegment); +} + +void +MediaEncoder::SetDirectConnect(bool aConnected) +{ + mDirectConnected = aConnected; +} + +void +MediaEncoder::NotifyRealtimeData(MediaStreamGraph* aGraph, + TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aRealtimeMedia) +{ + if (mSuspended == RECORD_NOT_SUSPENDED) { + // Process the incoming raw track data from MediaStreamGraph, called on the + // thread of MediaStreamGraph. + if (mAudioEncoder && aRealtimeMedia.GetType() == MediaSegment::AUDIO) { + mAudioEncoder->NotifyQueuedTrackChanges(aGraph, aID, + aTrackOffset, aTrackEvents, + aRealtimeMedia); + } else if (mVideoEncoder && + aRealtimeMedia.GetType() == MediaSegment::VIDEO && + aTrackEvents != TrackEventCommand::TRACK_EVENT_NONE) { + mVideoEncoder->NotifyQueuedTrackChanges(aGraph, aID, + aTrackOffset, aTrackEvents, + aRealtimeMedia); + } + } +} + +void +MediaEncoder::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, + TrackID aID, + StreamTime aTrackOffset, + TrackEventCommand aTrackEvents, + const MediaSegment& aQueuedMedia, + MediaStream* aInputStream, + TrackID aInputTrackID) +{ + if (!mDirectConnected) { + NotifyRealtimeData(aGraph, aID, aTrackOffset, aTrackEvents, aQueuedMedia); + } else { + if (aTrackEvents != TrackEventCommand::TRACK_EVENT_NONE) { + // forward events (TRACK_EVENT_ENDED) but not the media + if (aQueuedMedia.GetType() == MediaSegment::VIDEO) { + VideoSegment segment; + NotifyRealtimeData(aGraph, aID, aTrackOffset, aTrackEvents, segment); + } else { + AudioSegment segment; + NotifyRealtimeData(aGraph, aID, aTrackOffset, aTrackEvents, segment); + } + } + if (mSuspended == RECORD_RESUMED) { + if (mVideoEncoder) { + if (aQueuedMedia.GetType() == MediaSegment::VIDEO) { + // insert a null frame of duration equal to the first segment passed + // after Resume(), so it'll get added to one of the DirectListener frames + VideoSegment segment; + gfx::IntSize size(0,0); + segment.AppendFrame(nullptr, aQueuedMedia.GetDuration(), size, + PRINCIPAL_HANDLE_NONE); + mVideoEncoder->NotifyQueuedTrackChanges(aGraph, aID, + aTrackOffset, aTrackEvents, + segment); + mSuspended = RECORD_NOT_SUSPENDED; + } + } else { + mSuspended = RECORD_NOT_SUSPENDED; // no video + } + } + } +} + +void +MediaEncoder::NotifyQueuedAudioData(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + const AudioSegment& aQueuedMedia, + MediaStream* aInputStream, + TrackID aInputTrackID) +{ + if (!mDirectConnected) { + NotifyRealtimeData(aGraph, aID, aTrackOffset, 0, aQueuedMedia); + } else { + if (mSuspended == RECORD_RESUMED) { + if (!mVideoEncoder) { + mSuspended = RECORD_NOT_SUSPENDED; // no video + } + } + } +} + +void +MediaEncoder::NotifyEvent(MediaStreamGraph* aGraph, + MediaStreamGraphEvent event) +{ + // In case that MediaEncoder does not receive a TRACK_EVENT_ENDED event. + LOG(LogLevel::Debug, ("NotifyRemoved in [MediaEncoder].")); + if (mAudioEncoder) { + mAudioEncoder->NotifyEvent(aGraph, event); + } + if (mVideoEncoder) { + mVideoEncoder->NotifyEvent(aGraph, event); + } +} + +/* static */ +already_AddRefed<MediaEncoder> +MediaEncoder::CreateEncoder(const nsAString& aMIMEType, uint32_t aAudioBitrate, + uint32_t aVideoBitrate, uint32_t aBitrate, + uint8_t aTrackTypes, + TrackRate aTrackRate) +{ + PROFILER_LABEL("MediaEncoder", "CreateEncoder", + js::ProfileEntry::Category::OTHER); + + nsAutoPtr<ContainerWriter> writer; + nsAutoPtr<AudioTrackEncoder> audioEncoder; + nsAutoPtr<VideoTrackEncoder> videoEncoder; + RefPtr<MediaEncoder> encoder; + nsString mimeType; + if (!aTrackTypes) { + LOG(LogLevel::Error, ("NO TrackTypes!!!")); + return nullptr; + } +#ifdef MOZ_WEBM_ENCODER + else if (MediaEncoder::IsWebMEncoderEnabled() && + (aMIMEType.EqualsLiteral(VIDEO_WEBM) || + (aTrackTypes & ContainerWriter::CREATE_VIDEO_TRACK))) { + if (aTrackTypes & ContainerWriter::CREATE_AUDIO_TRACK + && MediaDecoder::IsOpusEnabled()) { + audioEncoder = new OpusTrackEncoder(); + NS_ENSURE_TRUE(audioEncoder, nullptr); + } + videoEncoder = new VP8TrackEncoder(aTrackRate); + writer = new WebMWriter(aTrackTypes); + NS_ENSURE_TRUE(writer, nullptr); + NS_ENSURE_TRUE(videoEncoder, nullptr); + mimeType = NS_LITERAL_STRING(VIDEO_WEBM); + } +#endif //MOZ_WEBM_ENCODER + else if (MediaDecoder::IsOggEnabled() && MediaDecoder::IsOpusEnabled() && + (aMIMEType.EqualsLiteral(AUDIO_OGG) || + (aTrackTypes & ContainerWriter::CREATE_AUDIO_TRACK))) { + writer = new OggWriter(); + audioEncoder = new OpusTrackEncoder(); + NS_ENSURE_TRUE(writer, nullptr); + NS_ENSURE_TRUE(audioEncoder, nullptr); + mimeType = NS_LITERAL_STRING(AUDIO_OGG); + } + else { + LOG(LogLevel::Error, ("Can not find any encoder to record this media stream")); + return nullptr; + } + LOG(LogLevel::Debug, ("Create encoder result:a[%d] v[%d] w[%d] mimeType = %s.", + audioEncoder != nullptr, videoEncoder != nullptr, + writer != nullptr, mimeType.get())); + if (videoEncoder && aVideoBitrate != 0) { + videoEncoder->SetBitrate(aVideoBitrate); + } + if (audioEncoder && aAudioBitrate != 0) { + audioEncoder->SetBitrate(aAudioBitrate); + } + encoder = new MediaEncoder(writer.forget(), audioEncoder.forget(), + videoEncoder.forget(), mimeType, aAudioBitrate, + aVideoBitrate, aBitrate); + return encoder.forget(); +} + +/** + * GetEncodedData() runs as a state machine, starting with mState set to + * GET_METADDATA, the procedure should be as follow: + * + * While non-stop + * If mState is GET_METADDATA + * Get the meta data from audio/video encoder + * If a meta data is generated + * Get meta data from audio/video encoder + * Set mState to ENCODE_TRACK + * Return the final container data + * + * If mState is ENCODE_TRACK + * Get encoded track data from audio/video encoder + * If a packet of track data is generated + * Insert encoded track data into the container stream of writer + * If the final container data is copied to aOutput + * Return the copy of final container data + * If this is the last packet of input stream + * Set mState to ENCODE_DONE + * + * If mState is ENCODE_DONE or ENCODE_ERROR + * Stop the loop + */ +void +MediaEncoder::GetEncodedData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + nsAString& aMIMEType) +{ + MOZ_ASSERT(!NS_IsMainThread()); + + aMIMEType = mMIMEType; + PROFILER_LABEL("MediaEncoder", "GetEncodedData", + js::ProfileEntry::Category::OTHER); + + bool reloop = true; + while (reloop) { + switch (mState) { + case ENCODE_METADDATA: { + LOG(LogLevel::Debug, ("ENCODE_METADDATA TimeStamp = %f", GetEncodeTimeStamp())); + nsresult rv = CopyMetadataToMuxer(mAudioEncoder.get()); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("Error! Fail to Set Audio Metadata")); + break; + } + rv = CopyMetadataToMuxer(mVideoEncoder.get()); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("Error! Fail to Set Video Metadata")); + break; + } + + rv = mWriter->GetContainerData(aOutputBufs, + ContainerWriter::GET_HEADER); + if (aOutputBufs != nullptr) { + mSizeOfBuffer = aOutputBufs->ShallowSizeOfExcludingThis(MallocSizeOf); + } + if (NS_FAILED(rv)) { + LOG(LogLevel::Error,("Error! writer fail to generate header!")); + mState = ENCODE_ERROR; + break; + } + LOG(LogLevel::Debug, ("Finish ENCODE_METADDATA TimeStamp = %f", GetEncodeTimeStamp())); + mState = ENCODE_TRACK; + break; + } + + case ENCODE_TRACK: { + LOG(LogLevel::Debug, ("ENCODE_TRACK TimeStamp = %f", GetEncodeTimeStamp())); + EncodedFrameContainer encodedData; + nsresult rv = NS_OK; + // We're most likely to actually wait for a video frame, so do that first to minimize + // capture offset/lipsync issues + rv = WriteEncodedDataToMuxer(mVideoEncoder.get()); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("Fail to write video encoder data to muxer")); + break; + } + rv = WriteEncodedDataToMuxer(mAudioEncoder.get()); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("Error! Fail to write audio encoder data to muxer")); + break; + } + LOG(LogLevel::Debug, ("Audio encoded TimeStamp = %f", GetEncodeTimeStamp())); + LOG(LogLevel::Debug, ("Video encoded TimeStamp = %f", GetEncodeTimeStamp())); + // In audio only or video only case, let unavailable track's flag to be true. + bool isAudioCompleted = (mAudioEncoder && mAudioEncoder->IsEncodingComplete()) || !mAudioEncoder; + bool isVideoCompleted = (mVideoEncoder && mVideoEncoder->IsEncodingComplete()) || !mVideoEncoder; + rv = mWriter->GetContainerData(aOutputBufs, + isAudioCompleted && isVideoCompleted ? + ContainerWriter::FLUSH_NEEDED : 0); + if (aOutputBufs != nullptr) { + mSizeOfBuffer = aOutputBufs->ShallowSizeOfExcludingThis(MallocSizeOf); + } + if (NS_SUCCEEDED(rv)) { + // Successfully get the copy of final container data from writer. + reloop = false; + } + mState = (mWriter->IsWritingComplete()) ? ENCODE_DONE : ENCODE_TRACK; + LOG(LogLevel::Debug, ("END ENCODE_TRACK TimeStamp = %f " + "mState = %d aComplete %d vComplete %d", + GetEncodeTimeStamp(), mState, isAudioCompleted, isVideoCompleted)); + break; + } + + case ENCODE_DONE: + case ENCODE_ERROR: + LOG(LogLevel::Debug, ("MediaEncoder has been shutdown.")); + mSizeOfBuffer = 0; + mShutdown = true; + reloop = false; + break; + default: + MOZ_CRASH("Invalid encode state"); + } + } +} + +nsresult +MediaEncoder::WriteEncodedDataToMuxer(TrackEncoder *aTrackEncoder) +{ + if (aTrackEncoder == nullptr) { + return NS_OK; + } + if (aTrackEncoder->IsEncodingComplete()) { + return NS_OK; + } + + PROFILER_LABEL("MediaEncoder", "WriteEncodedDataToMuxer", + js::ProfileEntry::Category::OTHER); + + EncodedFrameContainer encodedVideoData; + nsresult rv = aTrackEncoder->GetEncodedTrack(encodedVideoData); + if (NS_FAILED(rv)) { + // Encoding might be canceled. + LOG(LogLevel::Error, ("Error! Fail to get encoded data from video encoder.")); + mState = ENCODE_ERROR; + return rv; + } + rv = mWriter->WriteEncodedTrack(encodedVideoData, + aTrackEncoder->IsEncodingComplete() ? + ContainerWriter::END_OF_STREAM : 0); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("Error! Fail to write encoded video track to the media container.")); + mState = ENCODE_ERROR; + } + return rv; +} + +nsresult +MediaEncoder::CopyMetadataToMuxer(TrackEncoder *aTrackEncoder) +{ + if (aTrackEncoder == nullptr) { + return NS_OK; + } + + PROFILER_LABEL("MediaEncoder", "CopyMetadataToMuxer", + js::ProfileEntry::Category::OTHER); + + RefPtr<TrackMetadataBase> meta = aTrackEncoder->GetMetadata(); + if (meta == nullptr) { + LOG(LogLevel::Error, ("Error! metadata = null")); + mState = ENCODE_ERROR; + return NS_ERROR_ABORT; + } + + nsresult rv = mWriter->SetMetadata(meta); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("Error! SetMetadata fail")); + mState = ENCODE_ERROR; + } + return rv; +} + +#ifdef MOZ_WEBM_ENCODER +bool +MediaEncoder::IsWebMEncoderEnabled() +{ + return Preferences::GetBool("media.encoder.webm.enabled"); +} +#endif + +/* + * SizeOfExcludingThis measures memory being used by the Media Encoder. + * Currently it measures the size of the Encoder buffer and memory occupied + * by mAudioEncoder and mVideoEncoder. + */ +size_t +MediaEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const +{ + size_t amount = 0; + if (mState == ENCODE_TRACK) { + amount = mSizeOfBuffer + + (mAudioEncoder != nullptr ? mAudioEncoder->SizeOfExcludingThis(aMallocSizeOf) : 0) + + (mVideoEncoder != nullptr ? mVideoEncoder->SizeOfExcludingThis(aMallocSizeOf) : 0); + } + return amount; +} + +} // namespace mozilla diff --git a/dom/media/encoder/MediaEncoder.h b/dom/media/encoder/MediaEncoder.h new file mode 100644 index 000000000..41d7e71e2 --- /dev/null +++ b/dom/media/encoder/MediaEncoder.h @@ -0,0 +1,258 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MediaEncoder_h_ +#define MediaEncoder_h_ + +#include "mozilla/DebugOnly.h" +#include "TrackEncoder.h" +#include "ContainerWriter.h" +#include "CubebUtils.h" +#include "MediaStreamGraph.h" +#include "MediaStreamListener.h" +#include "nsAutoPtr.h" +#include "MediaStreamVideoSink.h" +#include "nsIMemoryReporter.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/Atomics.h" + +namespace mozilla { + +class MediaStreamVideoRecorderSink : public MediaStreamVideoSink +{ +public: + explicit MediaStreamVideoRecorderSink(VideoTrackEncoder* aEncoder) + : mVideoEncoder(aEncoder) {} + + // MediaStreamVideoSink methods + virtual void SetCurrentFrames(const VideoSegment& aSegment) override; + virtual void ClearFrames() override {} + +private: + virtual ~MediaStreamVideoRecorderSink() {} + VideoTrackEncoder* mVideoEncoder; +}; + +/** + * MediaEncoder is the framework of encoding module, it controls and manages + * procedures between ContainerWriter and TrackEncoder. ContainerWriter packs + * the encoded track data with a specific container (e.g. ogg, mp4). + * AudioTrackEncoder and VideoTrackEncoder are subclasses of TrackEncoder, and + * are responsible for encoding raw data coming from MediaStreamGraph. + * + * Also, MediaEncoder is a type of MediaStreamListener, it starts to receive raw + * segments after itself is added to the source stream. In the mean time, + * encoded track data is pulled by its owner periodically on a worker thread. A + * reentrant monitor is used to protect the push and pull of resource. + * + * MediaEncoder is designed to be a passive component, neither it owns nor in + * charge of managing threads. However, a monitor is used in function + * TrackEncoder::GetEncodedTrack() for the purpose of thread safety (e.g. + * between callbacks of MediaStreamListener and others), a call to this function + * might block. Therefore, MediaEncoder should not run on threads that forbid + * blocking, such as main thread or I/O thread. + * + * For example, an usage from MediaRecorder of this component would be: + * 1) Create an encoder with a valid MIME type. + * => encoder = MediaEncoder::CreateEncoder(aMIMEType); + * It then generate a ContainerWriter according to the MIME type, and an + * AudioTrackEncoder (or a VideoTrackEncoder too) associated with the media + * type. + * + * 2) Dispatch the task GetEncodedData() to a worker thread. + * + * 3) To start encoding, add this component to its source stream. + * => sourceStream->AddListener(encoder); + * + * 4) To stop encoding, remove this component from its source stream. + * => sourceStream->RemoveListener(encoder); + */ +class MediaEncoder : public DirectMediaStreamListener +{ + friend class MediaStreamVideoRecorderSink; +public : + enum { + ENCODE_METADDATA, + ENCODE_TRACK, + ENCODE_DONE, + ENCODE_ERROR, + }; + + MediaEncoder(ContainerWriter* aWriter, + AudioTrackEncoder* aAudioEncoder, + VideoTrackEncoder* aVideoEncoder, + const nsAString& aMIMEType, + uint32_t aAudioBitrate, + uint32_t aVideoBitrate, + uint32_t aBitrate) + : mWriter(aWriter) + , mAudioEncoder(aAudioEncoder) + , mVideoEncoder(aVideoEncoder) + , mVideoSink(new MediaStreamVideoRecorderSink(mVideoEncoder)) + , mStartTime(TimeStamp::Now()) + , mMIMEType(aMIMEType) + , mSizeOfBuffer(0) + , mState(MediaEncoder::ENCODE_METADDATA) + , mShutdown(false) + , mDirectConnected(false) + , mSuspended(false) +{} + + ~MediaEncoder() {}; + + enum SuspendState { + RECORD_NOT_SUSPENDED, + RECORD_SUSPENDED, + RECORD_RESUMED + }; + + /* Note - called from control code, not on MSG threads. */ + void Suspend() + { + mSuspended = RECORD_SUSPENDED; + } + + /** + * Note - called from control code, not on MSG threads. + * Arm to collect the Duration of the next video frame and give it + * to the next frame, in order to avoid any possible loss of sync. */ + void Resume() + { + if (mSuspended == RECORD_SUSPENDED) { + mSuspended = RECORD_RESUMED; + } + } + + /** + * Tells us which Notify to pay attention to for media + */ + void SetDirectConnect(bool aConnected); + + /** + * Notified by the AppendToTrack in MediaStreamGraph; aRealtimeMedia is the raw + * track data in form of MediaSegment. + */ + void NotifyRealtimeData(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aRealtimeMedia) override; + + /** + * Notified by the control loop of MediaStreamGraph; aQueueMedia is the raw + * track data in form of MediaSegment. + */ + void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + TrackEventCommand aTrackEvents, + const MediaSegment& aQueuedMedia, + MediaStream* aInputStream, + TrackID aInputTrackID) override; + + /** + * Notifed by the control loop of MediaStreamGraph; aQueueMedia is the audio + * data in the form of an AudioSegment. + */ + void NotifyQueuedAudioData(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + const AudioSegment& aQueuedMedia, + MediaStream* aInputStream, + TrackID aInputTrackID) override; + + /** + * * Notified the stream is being removed. + */ + void NotifyEvent(MediaStreamGraph* aGraph, + MediaStreamGraphEvent event) override; + + /** + * Creates an encoder with a given MIME type. Returns null if we are unable + * to create the encoder. For now, default aMIMEType to "audio/ogg" and use + * Ogg+Opus if it is empty. + */ + static already_AddRefed<MediaEncoder> CreateEncoder(const nsAString& aMIMEType, + uint32_t aAudioBitrate, uint32_t aVideoBitrate, + uint32_t aBitrate, + uint8_t aTrackTypes = ContainerWriter::CREATE_AUDIO_TRACK, + TrackRate aTrackRate = CubebUtils::PreferredSampleRate()); + /** + * Encodes the raw track data and returns the final container data. Assuming + * it is called on a single worker thread. The buffer of container data is + * allocated in ContainerWriter::GetContainerData(), and is appended to + * aOutputBufs. aMIMEType is the valid mime-type of this returned container + * data. + */ + void GetEncodedData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + nsAString& aMIMEType); + + /** + * Return true if MediaEncoder has been shutdown. Reasons are encoding + * complete, encounter an error, or being canceled by its caller. + */ + bool IsShutdown() + { + return mShutdown; + } + + /** + * Cancel the encoding, and wakes up the lock of reentrant monitor in encoder. + */ + void Cancel() + { + if (mAudioEncoder) { + mAudioEncoder->NotifyCancel(); + } + if (mVideoEncoder) { + mVideoEncoder->NotifyCancel(); + } + } + + bool HasError() + { + return mState == ENCODE_ERROR; + } + +#ifdef MOZ_WEBM_ENCODER + static bool IsWebMEncoderEnabled(); +#endif + + MOZ_DEFINE_MALLOC_SIZE_OF(MallocSizeOf) + /* + * Measure the size of the buffer, and memory occupied by mAudioEncoder + * and mVideoEncoder + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + MediaStreamVideoRecorderSink* GetVideoSink() { + return mVideoSink.get(); + } + +private: + // Get encoded data from trackEncoder and write to muxer + nsresult WriteEncodedDataToMuxer(TrackEncoder *aTrackEncoder); + // Get metadata from trackEncoder and copy to muxer + nsresult CopyMetadataToMuxer(TrackEncoder* aTrackEncoder); + nsAutoPtr<ContainerWriter> mWriter; + nsAutoPtr<AudioTrackEncoder> mAudioEncoder; + nsAutoPtr<VideoTrackEncoder> mVideoEncoder; + RefPtr<MediaStreamVideoRecorderSink> mVideoSink; + TimeStamp mStartTime; + nsString mMIMEType; + int64_t mSizeOfBuffer; + int mState; + bool mShutdown; + bool mDirectConnected; + Atomic<int> mSuspended; + // Get duration from create encoder, for logging purpose + double GetEncodeTimeStamp() + { + TimeDuration decodeTime; + decodeTime = TimeStamp::Now() - mStartTime; + return decodeTime.ToMilliseconds(); + } +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp new file mode 100644 index 000000000..c65d57788 --- /dev/null +++ b/dom/media/encoder/OpusTrackEncoder.cpp @@ -0,0 +1,462 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "OpusTrackEncoder.h" +#include "nsString.h" +#include "GeckoProfiler.h" + +#include <opus/opus.h> + +#undef LOG +#ifdef MOZ_WIDGET_GONK +#include <android/log.h> +#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args); +#else +#define LOG(args, ...) +#endif + +namespace mozilla { + +// The Opus format supports up to 8 channels, and supports multitrack audio up +// to 255 channels, but the current implementation supports only mono and +// stereo, and downmixes any more than that. +static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8; + +// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html +// In section "opus_encoder_init", channels must be 1 or 2 of input signal. +static const int MAX_CHANNELS = 2; + +// A maximum data bytes for Opus to encode. +static const int MAX_DATA_BYTES = 4096; + +// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4 +// Second paragraph, " The granule position of an audio data page is in units +// of PCM audio samples at a fixed rate of 48 kHz." +static const int kOpusSamplingRate = 48000; + +// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms. +static const int kFrameDurationMs = 20; + +// The supported sampling rate of input signal (Hz), +// must be one of the following. Will resampled to 48kHz otherwise. +static const int kOpusSupportedInputSamplingRates[] = + {8000, 12000, 16000, 24000, 48000}; + +namespace { + +// An endian-neutral serialization of integers. Serializing T in little endian +// format to aOutput, where T is a 16 bits or 32 bits integer. +template<typename T> +static void +SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput) +{ + for (uint32_t i = 0; i < sizeof(T); i++) { + aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8)))); + } +} + +static inline void +SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput) +{ + // Format of serializing a string to buffer is, the length of string (32 bits, + // little endian), and the string. + SerializeToBuffer((uint32_t)(aComment.Length()), aOutput); + aOutput->AppendElements(aComment.get(), aComment.Length()); +} + + +static void +SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip, + uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput) +{ + // The magic signature, null terminator has to be stripped off from strings. + static const uint8_t magic[] = "OpusHead"; + aOutput->AppendElements(magic, sizeof(magic) - 1); + + // The version must always be 1 (8 bits, unsigned). + aOutput->AppendElement(1); + + // Number of output channels (8 bits, unsigned). + aOutput->AppendElement(aChannelCount); + + // Number of samples (at 48 kHz) to discard from the decoder output when + // starting playback (16 bits, unsigned, little endian). + SerializeToBuffer(aPreskip, aOutput); + + // The sampling rate of input source (32 bits, unsigned, little endian). + SerializeToBuffer(aInputSampleRate, aOutput); + + // Output gain, an encoder should set this field to zero (16 bits, signed, + // little endian). + SerializeToBuffer((int16_t)0, aOutput); + + // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits, + // unsigned). + aOutput->AppendElement(0); +} + +static void +SerializeOpusCommentHeader(const nsCString& aVendor, + const nsTArray<nsCString>& aComments, + nsTArray<uint8_t>* aOutput) +{ + // The magic signature, null terminator has to be stripped off. + static const uint8_t magic[] = "OpusTags"; + aOutput->AppendElements(magic, sizeof(magic) - 1); + + // The vendor; Should append in the following order: + // vendor string length (32 bits, unsigned, little endian) + // vendor string. + SerializeToBuffer(aVendor, aOutput); + + // Add comments; Should append in the following order: + // comment list length (32 bits, unsigned, little endian) + // comment #0 string length (32 bits, unsigned, little endian) + // comment #0 string + // comment #1 string length (32 bits, unsigned, little endian) + // comment #1 string ... + SerializeToBuffer((uint32_t)aComments.Length(), aOutput); + for (uint32_t i = 0; i < aComments.Length(); ++i) { + SerializeToBuffer(aComments[i], aOutput); + } +} + +} // Anonymous namespace. + +OpusTrackEncoder::OpusTrackEncoder() + : AudioTrackEncoder() + , mEncoder(nullptr) + , mLookahead(0) + , mResampler(nullptr) + , mOutputTimeStamp(0) +{ +} + +OpusTrackEncoder::~OpusTrackEncoder() +{ + if (mEncoder) { + opus_encoder_destroy(mEncoder); + } + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } +} + +nsresult +OpusTrackEncoder::Init(int aChannels, int aSamplingRate) +{ + // This monitor is used to wake up other methods that are waiting for encoder + // to be completely initialized. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0), + NS_ERROR_FAILURE); + + // This version of encoder API only support 1 or 2 channels, + // So set the mChannels less or equal 2 and + // let InterleaveTrackData downmix pcm data. + mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels; + + // Reject non-audio sample rates. + NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG); + NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG); + + // According to www.opus-codec.org, creating an opus encoder requires the + // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or + // 48000. If this constraint is not satisfied, we resample the input to 48kHz. + nsTArray<int> supportedSamplingRates; + supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates, + ArrayLength(kOpusSupportedInputSamplingRates)); + if (!supportedSamplingRates.Contains(aSamplingRate)) { + int error; + mResampler = speex_resampler_init(mChannels, + aSamplingRate, + kOpusSamplingRate, + SPEEX_RESAMPLER_QUALITY_DEFAULT, + &error); + + if (error != RESAMPLER_ERR_SUCCESS) { + return NS_ERROR_FAILURE; + } + } + mSamplingRate = aSamplingRate; + NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE); + + int error = 0; + mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels, + OPUS_APPLICATION_AUDIO, &error); + + + mInitialized = (error == OPUS_OK); + + if (mAudioBitrate) { + opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate))); + } + + mReentrantMonitor.NotifyAll(); + + return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE; +} + +int +OpusTrackEncoder::GetOutputSampleRate() +{ + return mResampler ? kOpusSamplingRate : mSamplingRate; +} + +int +OpusTrackEncoder::GetPacketDuration() +{ + return GetOutputSampleRate() * kFrameDurationMs / 1000; +} + +already_AddRefed<TrackMetadataBase> +OpusTrackEncoder::GetMetadata() +{ + PROFILER_LABEL("OpusTrackEncoder", "GetMetadata", + js::ProfileEntry::Category::OTHER); + { + // Wait if mEncoder is not initialized. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + while (!mCanceled && !mInitialized) { + mReentrantMonitor.Wait(); + } + } + + if (mCanceled || mEncodingComplete) { + return nullptr; + } + + RefPtr<OpusMetadata> meta = new OpusMetadata(); + meta->mChannels = mChannels; + meta->mSamplingFrequency = mSamplingRate; + + mLookahead = 0; + int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead)); + if (error != OPUS_OK) { + mLookahead = 0; + } + + // The ogg time stamping and pre-skip is always timed at 48000. + SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate / + GetOutputSampleRate()), mSamplingRate, + &meta->mIdHeader); + + nsCString vendor; + vendor.AppendASCII(opus_get_version_string()); + + nsTArray<nsCString> comments; + comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION)); + + SerializeOpusCommentHeader(vendor, comments, + &meta->mCommentHeader); + + return meta.forget(); +} + +nsresult +OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData) +{ + PROFILER_LABEL("OpusTrackEncoder", "GetEncodedTrack", + js::ProfileEntry::Category::OTHER); + { + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + // Wait until initialized or cancelled. + while (!mCanceled && !mInitialized) { + mReentrantMonitor.Wait(); + } + if (mCanceled || mEncodingComplete) { + return NS_ERROR_FAILURE; + } + } + + // calculation below depends on the truth that mInitialized is true. + MOZ_ASSERT(mInitialized); + + bool wait = true; + int result = 0; + // Only wait once, then loop until we run out of packets of input data + while (result >= 0 && !mEncodingComplete) { + // re-sampled frames left last time which didn't fit into an Opus packet duration. + const int framesLeft = mResampledLeftover.Length() / mChannels; + // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple + // of kOpusSamplingRate. There is not precision loss in the integer division + // in computing framesToFetch. If frameLeft > 0, we need to add 1 to + // framesToFetch to ensure there will be at least n frames after re-sampling. + const int frameRoundUp = framesLeft ? 1 : 0; + + MOZ_ASSERT(GetPacketDuration() >= framesLeft); + // Try to fetch m frames such that there will be n frames + // where (n + frameLeft) >= GetPacketDuration() after re-sampling. + const int framesToFetch = !mResampler ? GetPacketDuration() + : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate + + frameRoundUp; + { + // Move all the samples from mRawSegment to mSourceSegment. We only hold + // the monitor in this block. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + // Wait until enough raw data, end of stream or cancelled. + while (!mCanceled && mRawSegment.GetDuration() + + mSourceSegment.GetDuration() < framesToFetch && + !mEndOfStream) { + if (wait) { + mReentrantMonitor.Wait(); + wait = false; + } else { + goto done; // nested while's... + } + } + + if (mCanceled) { + return NS_ERROR_FAILURE; + } + + mSourceSegment.AppendFrom(&mRawSegment); + + // Pad |mLookahead| samples to the end of source stream to prevent lost of + // original data, the pcm duration will be calculated at rate 48K later. + if (mEndOfStream && !mEosSetInEncoder) { + mEosSetInEncoder = true; + mSourceSegment.AppendNullData(mLookahead); + } + } + + // Start encoding data. + AutoTArray<AudioDataValue, 9600> pcm; + pcm.SetLength(GetPacketDuration() * mChannels); + AudioSegment::ChunkIterator iter(mSourceSegment); + int frameCopied = 0; + + while (!iter.IsEnded() && frameCopied < framesToFetch) { + AudioChunk chunk = *iter; + + // Chunk to the required frame size. + int frameToCopy = chunk.GetDuration(); + if (frameCopied + frameToCopy > framesToFetch) { + frameToCopy = framesToFetch - frameCopied; + } + + if (!chunk.IsNull()) { + // Append the interleaved data to the end of pcm buffer. + AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels, + pcm.Elements() + frameCopied * mChannels); + } else { + memset(pcm.Elements() + frameCopied * mChannels, 0, + frameToCopy * mChannels * sizeof(AudioDataValue)); + } + + frameCopied += frameToCopy; + iter.Next(); + } + + RefPtr<EncodedFrame> audiodata = new EncodedFrame(); + audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME); + int framesInPCM = frameCopied; + if (mResampler) { + AutoTArray<AudioDataValue, 9600> resamplingDest; + // We want to consume all the input data, so we slightly oversize the + // resampled data buffer so we can fit the output data in. We cannot really + // predict the output frame count at each call. + uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1; + uint32_t inframes = frameCopied; + + resamplingDest.SetLength(outframes * mChannels); + +#if MOZ_SAMPLE_TYPE_S16 + short* in = reinterpret_cast<short*>(pcm.Elements()); + short* out = reinterpret_cast<short*>(resamplingDest.Elements()); + speex_resampler_process_interleaved_int(mResampler, in, &inframes, + out, &outframes); +#else + float* in = reinterpret_cast<float*>(pcm.Elements()); + float* out = reinterpret_cast<float*>(resamplingDest.Elements()); + speex_resampler_process_interleaved_float(mResampler, in, &inframes, + out, &outframes); +#endif + + MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length()); + PodCopy(pcm.Elements(), mResampledLeftover.Elements(), + mResampledLeftover.Length()); + + uint32_t outframesToCopy = std::min(outframes, + static_cast<uint32_t>(GetPacketDuration() - framesLeft)); + + MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >= + outframesToCopy * mChannels); + PodCopy(pcm.Elements() + mResampledLeftover.Length(), + resamplingDest.Elements(), outframesToCopy * mChannels); + int frameLeftover = outframes - outframesToCopy; + mResampledLeftover.SetLength(frameLeftover * mChannels); + PodCopy(mResampledLeftover.Elements(), + resamplingDest.Elements() + outframesToCopy * mChannels, + mResampledLeftover.Length()); + // This is always at 48000Hz. + framesInPCM = framesLeft + outframesToCopy; + audiodata->SetDuration(framesInPCM); + } else { + // The ogg time stamping and pre-skip is always timed at 48000. + audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate)); + } + + // Remove the raw data which has been pulled to pcm buffer. + // The value of frameCopied should equal to (or smaller than, if eos) + // GetPacketDuration(). + mSourceSegment.RemoveLeading(frameCopied); + + // Has reached the end of input stream and all queued data has pulled for + // encoding. + if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) { + mEncodingComplete = true; + LOG("[Opus] Done encoding."); + } + + MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration()); + + // Append null data to pcm buffer if the leftover data is not enough for + // opus encoder. + if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) { + PodZero(pcm.Elements() + framesInPCM * mChannels, + (GetPacketDuration() - framesInPCM) * mChannels); + } + nsTArray<uint8_t> frameData; + // Encode the data with Opus Encoder. + frameData.SetLength(MAX_DATA_BYTES); + // result is returned as opus error code if it is negative. + result = 0; +#ifdef MOZ_SAMPLE_TYPE_S16 + const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements()); + result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(), + frameData.Elements(), MAX_DATA_BYTES); +#else + const float* pcmBuf = static_cast<float*>(pcm.Elements()); + result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(), + frameData.Elements(), MAX_DATA_BYTES); +#endif + frameData.SetLength(result >= 0 ? result : 0); + + if (result < 0) { + LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result)); + } + if (mEncodingComplete) { + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } + mResampledLeftover.SetLength(0); + } + + audiodata->SwapInFrameData(frameData); + // timestamp should be the time of the first sample + audiodata->SetTimeStamp(mOutputTimeStamp); + mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value(); + LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp); + aData.AppendEncodedFrame(audiodata); + } +done: + return result >= 0 ? NS_OK : NS_ERROR_FAILURE; +} + +} // namespace mozilla diff --git a/dom/media/encoder/OpusTrackEncoder.h b/dom/media/encoder/OpusTrackEncoder.h new file mode 100644 index 000000000..8fd21d49b --- /dev/null +++ b/dom/media/encoder/OpusTrackEncoder.h @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OpusTrackEncoder_h_ +#define OpusTrackEncoder_h_ + +#include <stdint.h> +#include <speex/speex_resampler.h> +#include "TrackEncoder.h" + +struct OpusEncoder; + +namespace mozilla { + +// Opus meta data structure +class OpusMetadata : public TrackMetadataBase +{ +public: + // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus. + nsTArray<uint8_t> mIdHeader; + // The Comment Header of OggOpus. + nsTArray<uint8_t> mCommentHeader; + int32_t mChannels; + float mSamplingFrequency; + MetadataKind GetKind() const override { return METADATA_OPUS; } +}; + +class OpusTrackEncoder : public AudioTrackEncoder +{ +public: + OpusTrackEncoder(); + virtual ~OpusTrackEncoder(); + + already_AddRefed<TrackMetadataBase> GetMetadata() override; + + nsresult GetEncodedTrack(EncodedFrameContainer& aData) override; + +protected: + int GetPacketDuration() override; + + nsresult Init(int aChannels, int aSamplingRate) override; + + /** + * Get the samplerate of the data to be fed to the Opus encoder. This might be + * different from the input samplerate if resampling occurs. + */ + int GetOutputSampleRate(); + +private: + /** + * The Opus encoder from libopus. + */ + OpusEncoder* mEncoder; + + /** + * A local segment queue which takes the raw data out from mRawSegment in the + * call of GetEncodedTrack(). Opus encoder only accepts GetPacketDuration() + * samples from mSourceSegment every encoding cycle, thus it needs to be + * global in order to store the leftover segments taken from mRawSegment. + */ + AudioSegment mSourceSegment; + + /** + * Total samples of delay added by codec, can be queried by the encoder. From + * the perspective of decoding, real data begins this many samples late, so + * the encoder needs to append this many null samples to the end of stream, + * in order to align the time of input and output. + */ + int mLookahead; + + /** + * If the input sample rate does not divide 48kHz evenly, the input data are + * resampled. + */ + SpeexResamplerState* mResampler; + + /** + * Store the resampled frames that don't fit into an Opus packet duration. + * They will be prepended to the resampled frames next encoding cycle. + */ + nsTArray<AudioDataValue> mResampledLeftover; + + // TimeStamp in microseconds. + uint64_t mOutputTimeStamp; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/TrackEncoder.cpp b/dom/media/encoder/TrackEncoder.cpp new file mode 100644 index 000000000..ea39bb5a6 --- /dev/null +++ b/dom/media/encoder/TrackEncoder.cpp @@ -0,0 +1,342 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "TrackEncoder.h" +#include "AudioChannelFormat.h" +#include "MediaStreamGraph.h" +#include "MediaStreamListener.h" +#include "mozilla/Logging.h" +#include "VideoUtils.h" + +#undef LOG +#ifdef MOZ_WIDGET_GONK +#include <android/log.h> +#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args); +#else +#define LOG(args, ...) +#endif + +namespace mozilla { + +LazyLogModule gTrackEncoderLog("TrackEncoder"); +#define TRACK_LOG(type, msg) MOZ_LOG(gTrackEncoderLog, type, msg) + +static const int DEFAULT_CHANNELS = 1; +static const int DEFAULT_SAMPLING_RATE = 16000; +static const int DEFAULT_FRAME_WIDTH = 640; +static const int DEFAULT_FRAME_HEIGHT = 480; +static const int DEFAULT_TRACK_RATE = USECS_PER_S; +// 30 seconds threshold if the encoder still can't not be initialized. +static const int INIT_FAILED_DURATION = 30; + +TrackEncoder::TrackEncoder() + : mReentrantMonitor("media.TrackEncoder") + , mEncodingComplete(false) + , mEosSetInEncoder(false) + , mInitialized(false) + , mEndOfStream(false) + , mCanceled(false) + , mInitCounter(0) + , mNotInitDuration(0) +{ +} + +void TrackEncoder::NotifyEvent(MediaStreamGraph* aGraph, + MediaStreamGraphEvent event) +{ + if (event == MediaStreamGraphEvent::EVENT_REMOVED) { + NotifyEndOfStream(); + } +} + +void +AudioTrackEncoder::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, + TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aQueuedMedia) +{ + if (mCanceled) { + return; + } + + const AudioSegment& audio = static_cast<const AudioSegment&>(aQueuedMedia); + + // Check and initialize parameters for codec encoder. + if (!mInitialized) { + mInitCounter++; + TRACK_LOG(LogLevel::Debug, ("Init the audio encoder %d times", mInitCounter)); + AudioSegment::ChunkIterator iter(const_cast<AudioSegment&>(audio)); + while (!iter.IsEnded()) { + AudioChunk chunk = *iter; + + // The number of channels is determined by the first non-null chunk, and + // thus the audio encoder is initialized at this time. + if (!chunk.IsNull()) { + nsresult rv = Init(chunk.mChannelData.Length(), aGraph->GraphRate()); + if (NS_FAILED(rv)) { + LOG("[AudioTrackEncoder]: Fail to initialize the encoder!"); + NotifyCancel(); + } + break; + } + + iter.Next(); + } + + mNotInitDuration += aQueuedMedia.GetDuration(); + if (!mInitialized && + (mNotInitDuration / aGraph->GraphRate() > INIT_FAILED_DURATION) && + mInitCounter > 1) { + LOG("[AudioTrackEncoder]: Initialize failed for 30s."); + NotifyEndOfStream(); + return; + } + } + + // Append and consume this raw segment. + AppendAudioSegment(audio); + + + // The stream has stopped and reached the end of track. + if (aTrackEvents == TrackEventCommand::TRACK_EVENT_ENDED) { + LOG("[AudioTrackEncoder]: Receive TRACK_EVENT_ENDED ."); + NotifyEndOfStream(); + } +} + +void +AudioTrackEncoder::NotifyEndOfStream() +{ + // If source audio track is completely silent till the end of encoding, + // initialize the encoder with default channel counts and sampling rate. + if (!mCanceled && !mInitialized) { + Init(DEFAULT_CHANNELS, DEFAULT_SAMPLING_RATE); + } + + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + mEndOfStream = true; + mReentrantMonitor.NotifyAll(); +} + +nsresult +AudioTrackEncoder::AppendAudioSegment(const AudioSegment& aSegment) +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + AudioSegment::ChunkIterator iter(const_cast<AudioSegment&>(aSegment)); + while (!iter.IsEnded()) { + AudioChunk chunk = *iter; + // Append and consume both non-null and null chunks. + mRawSegment.AppendAndConsumeChunk(&chunk); + iter.Next(); + } + + if (mRawSegment.GetDuration() >= GetPacketDuration()) { + mReentrantMonitor.NotifyAll(); + } + + return NS_OK; +} + +/*static*/ +void +AudioTrackEncoder::InterleaveTrackData(AudioChunk& aChunk, + int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput) +{ + uint32_t numChannelsToCopy = std::min(aOutputChannels, + static_cast<uint32_t>(aChunk.mChannelData.Length())); + switch(aChunk.mBufferFormat) { + case AUDIO_FORMAT_S16: { + AutoTArray<const int16_t*, 2> array; + array.SetLength(numChannelsToCopy); + for (uint32_t i = 0; i < array.Length(); i++) { + array[i] = static_cast<const int16_t*>(aChunk.mChannelData[i]); + } + InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, aChunk.mVolume); + break; + } + case AUDIO_FORMAT_FLOAT32: { + AutoTArray<const float*, 2> array; + array.SetLength(numChannelsToCopy); + for (uint32_t i = 0; i < array.Length(); i++) { + array[i] = static_cast<const float*>(aChunk.mChannelData[i]); + } + InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, aChunk.mVolume); + break; + } + case AUDIO_FORMAT_SILENCE: { + MOZ_ASSERT(false, "To implement."); + } + }; +} + +/*static*/ +void +AudioTrackEncoder::DeInterleaveTrackData(AudioDataValue* aInput, + int32_t aDuration, + int32_t aChannels, + AudioDataValue* aOutput) +{ + for (int32_t i = 0; i < aChannels; ++i) { + for(int32_t j = 0; j < aDuration; ++j) { + aOutput[i * aDuration + j] = aInput[i + j * aChannels]; + } + } +} + +size_t +AudioTrackEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const +{ + return mRawSegment.SizeOfExcludingThis(aMallocSizeOf); +} + +void +VideoTrackEncoder::Init(const VideoSegment& aSegment) +{ + if (mInitialized) { + return; + } + + mInitCounter++; + TRACK_LOG(LogLevel::Debug, ("Init the video encoder %d times", mInitCounter)); + VideoSegment::ConstChunkIterator iter(aSegment); + while (!iter.IsEnded()) { + VideoChunk chunk = *iter; + if (!chunk.IsNull()) { + gfx::IntSize imgsize = chunk.mFrame.GetImage()->GetSize(); + gfx::IntSize intrinsicSize = chunk.mFrame.GetIntrinsicSize(); + nsresult rv = Init(imgsize.width, imgsize.height, + intrinsicSize.width, intrinsicSize.height); + + if (NS_FAILED(rv)) { + LOG("[VideoTrackEncoder]: Fail to initialize the encoder!"); + NotifyCancel(); + } + break; + } + + iter.Next(); + } + + mNotInitDuration += aSegment.GetDuration(); + if ((mNotInitDuration / mTrackRate > INIT_FAILED_DURATION) && + mInitCounter > 1) { + LOG("[VideoTrackEncoder]: Initialize failed for %ds.", INIT_FAILED_DURATION); + NotifyEndOfStream(); + return; + } + +} + +void +VideoTrackEncoder::SetCurrentFrames(const VideoSegment& aSegment) +{ + if (mCanceled) { + return; + } + + Init(aSegment); + AppendVideoSegment(aSegment); +} + +void +VideoTrackEncoder::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, + TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aQueuedMedia) +{ + if (mCanceled) { + return; + } + + if (!(aTrackEvents == TRACK_EVENT_CREATED || + aTrackEvents == TRACK_EVENT_ENDED)) { + return; + } + + const VideoSegment& video = static_cast<const VideoSegment&>(aQueuedMedia); + + // Check and initialize parameters for codec encoder. + Init(video); + + AppendVideoSegment(video); + + // The stream has stopped and reached the end of track. + if (aTrackEvents == TrackEventCommand::TRACK_EVENT_ENDED) { + LOG("[VideoTrackEncoder]: Receive TRACK_EVENT_ENDED ."); + NotifyEndOfStream(); + } + +} + +nsresult +VideoTrackEncoder::AppendVideoSegment(const VideoSegment& aSegment) +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + // Append all video segments from MediaStreamGraph, including null an + // non-null frames. + VideoSegment::ChunkIterator iter(const_cast<VideoSegment&>(aSegment)); + while (!iter.IsEnded()) { + VideoChunk chunk = *iter; + mLastFrameDuration += chunk.GetDuration(); + // Send only the unique video frames for encoding. + // Or if we got the same video chunks more than 1 seconds, + // force to send into encoder. + if ((mLastFrame != chunk.mFrame) || + (mLastFrameDuration >= mTrackRate)) { + RefPtr<layers::Image> image = chunk.mFrame.GetImage(); + + // Because we may get chunks with a null image (due to input blocking), + // accumulate duration and give it to the next frame that arrives. + // Canonically incorrect - the duration should go to the previous frame + // - but that would require delaying until the next frame arrives. + // Best would be to do like OMXEncoder and pass an effective timestamp + // in with each frame. + if (image) { + mRawSegment.AppendFrame(image.forget(), + mLastFrameDuration, + chunk.mFrame.GetIntrinsicSize(), + PRINCIPAL_HANDLE_NONE, + chunk.mFrame.GetForceBlack()); + mLastFrameDuration = 0; + } + } + mLastFrame.TakeFrom(&chunk.mFrame); + iter.Next(); + } + + if (mRawSegment.GetDuration() > 0) { + mReentrantMonitor.NotifyAll(); + } + + return NS_OK; +} + +void +VideoTrackEncoder::NotifyEndOfStream() +{ + // If source video track is muted till the end of encoding, initialize the + // encoder with default frame width, frame height, and track rate. + if (!mCanceled && !mInitialized) { + Init(DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT, + DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT); + } + + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + mEndOfStream = true; + mReentrantMonitor.NotifyAll(); +} + +size_t +VideoTrackEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const +{ + return mRawSegment.SizeOfExcludingThis(aMallocSizeOf); +} + +} // namespace mozilla diff --git a/dom/media/encoder/TrackEncoder.h b/dom/media/encoder/TrackEncoder.h new file mode 100644 index 000000000..33f20e899 --- /dev/null +++ b/dom/media/encoder/TrackEncoder.h @@ -0,0 +1,364 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TrackEncoder_h_ +#define TrackEncoder_h_ + +#include "mozilla/ReentrantMonitor.h" + +#include "AudioSegment.h" +#include "EncodedFrameContainer.h" +#include "StreamTracks.h" +#include "TrackMetadataBase.h" +#include "VideoSegment.h" +#include "MediaStreamGraph.h" + +namespace mozilla { + +/** + * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetimes managed by + * MediaEncoder. Most methods can only be called on the MediaEncoder's thread, + * but some subclass methods can be called on other threads when noted. + * + * NotifyQueuedTrackChanges is called on subclasses of this class from the + * MediaStreamGraph thread, and AppendAudioSegment/AppendVideoSegment is then + * called to store media data in the TrackEncoder. Later on, GetEncodedTrack is + * called on MediaEncoder's thread to encode and retrieve the encoded data. + */ +class TrackEncoder +{ +public: + TrackEncoder(); + + virtual ~TrackEncoder() {} + + /** + * Notified by the same callbcak of MediaEncoder when it has received a track + * change from MediaStreamGraph. Called on the MediaStreamGraph thread. + */ + virtual void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aQueuedMedia) = 0; + + /** + * Notified by the same callback of MediaEncoder when it has been removed from + * MediaStreamGraph. Called on the MediaStreamGraph thread. + */ + void NotifyEvent(MediaStreamGraph* aGraph, + MediaStreamGraphEvent event); + + /** + * Creates and sets up meta data for a specific codec, called on the worker + * thread. + */ + virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0; + + /** + * Encodes raw segments. Result data is returned in aData, and called on the + * worker thread. + */ + virtual nsresult GetEncodedTrack(EncodedFrameContainer& aData) = 0; + + /** + * True if the track encoder has encoded all source segments coming from + * MediaStreamGraph. Call on the worker thread. + */ + bool IsEncodingComplete() { return mEncodingComplete; } + + /** + * Notifies from MediaEncoder to cancel the encoding, and wakes up + * mReentrantMonitor if encoder is waiting on it. + */ + void NotifyCancel() + { + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + mCanceled = true; + mReentrantMonitor.NotifyAll(); + } + + virtual void SetBitrate(const uint32_t aBitrate) {} + +protected: + /** + * Notifies track encoder that we have reached the end of source stream, and + * wakes up mReentrantMonitor if encoder is waiting for any source data. + */ + virtual void NotifyEndOfStream() = 0; + + /** + * A ReentrantMonitor to protect the pushing and pulling of mRawSegment which + * is declared in its subclasses, and the following flags: mInitialized, + * EndOfStream and mCanceled. The control of protection is managed by its + * subclasses. + */ + ReentrantMonitor mReentrantMonitor; + + /** + * True if the track encoder has encoded all source data. + */ + bool mEncodingComplete; + + /** + * True if flag of EOS or any form of indicating EOS has set in the codec- + * encoder. + */ + bool mEosSetInEncoder; + + /** + * True if the track encoder has initialized successfully, protected by + * mReentrantMonitor. + */ + bool mInitialized; + + /** + * True if the TrackEncoder has received an event of TRACK_EVENT_ENDED from + * MediaStreamGraph, or the MediaEncoder is removed from its source stream, + * protected by mReentrantMonitor. + */ + bool mEndOfStream; + + /** + * True if a cancellation of encoding is sent from MediaEncoder, protected by + * mReentrantMonitor. + */ + bool mCanceled; + + // How many times we have tried to initialize the encoder. + uint32_t mInitCounter; + StreamTime mNotInitDuration; +}; + +class AudioTrackEncoder : public TrackEncoder +{ +public: + AudioTrackEncoder() + : TrackEncoder() + , mChannels(0) + , mSamplingRate(0) + , mAudioBitrate(0) + {} + + void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aQueuedMedia) override; + + template<typename T> + static + void InterleaveTrackData(nsTArray<const T*>& aInput, + int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput, + float aVolume) + { + if (aInput.Length() < aOutputChannels) { + // Up-mix. This might make the mChannelData have more than aChannels. + AudioChannelsUpMix(&aInput, aOutputChannels, SilentChannel::ZeroChannel<T>()); + } + + if (aInput.Length() > aOutputChannels) { + DownmixAndInterleave(aInput, aDuration, + aVolume, aOutputChannels, aOutput); + } else { + InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume, + aOutputChannels, aOutput); + } + } + + /** + * Interleaves the track data and stores the result into aOutput. Might need + * to up-mix or down-mix the channel data if the channels number of this chunk + * is different from aOutputChannels. The channel data from aChunk might be + * modified by up-mixing. + */ + static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput); + + /** + * De-interleaves the aInput data and stores the result into aOutput. + * No up-mix or down-mix operations inside. + */ + static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration, + int32_t aChannels, AudioDataValue* aOutput); + /** + * Measure size of mRawSegment + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + void SetBitrate(const uint32_t aBitrate) override + { + mAudioBitrate = aBitrate; + } +protected: + /** + * Number of samples per channel in a pcm buffer. This is also the value of + * frame size required by audio encoder, and mReentrantMonitor will be + * notified when at least this much data has been added to mRawSegment. + */ + virtual int GetPacketDuration() { return 0; } + + /** + * Initializes the audio encoder. The call of this method is delayed until we + * have received the first valid track from MediaStreamGraph, and the + * mReentrantMonitor will be notified if other methods is waiting for encoder + * to be completely initialized. This method is called on the MediaStreamGraph + * thread. + */ + virtual nsresult Init(int aChannels, int aSamplingRate) = 0; + + /** + * Appends and consumes track data from aSegment, this method is called on + * the MediaStreamGraph thread. mReentrantMonitor will be notified when at + * least GetPacketDuration() data has been added to mRawSegment, wake up other + * method which is waiting for more data from mRawSegment. + */ + nsresult AppendAudioSegment(const AudioSegment& aSegment); + + /** + * Notifies the audio encoder that we have reached the end of source stream, + * and wakes up mReentrantMonitor if encoder is waiting for more track data. + */ + void NotifyEndOfStream() override; + + /** + * The number of channels are used for processing PCM data in the audio encoder. + * This value comes from the first valid audio chunk. If encoder can't support + * the channels in the chunk, downmix PCM stream can be performed. + * This value also be used to initialize the audio encoder. + */ + int mChannels; + + /** + * The sampling rate of source audio data. + */ + int mSamplingRate; + + /** + * A segment queue of audio track data, protected by mReentrantMonitor. + */ + AudioSegment mRawSegment; + + uint32_t mAudioBitrate; +}; + +class VideoTrackEncoder : public TrackEncoder +{ +public: + explicit VideoTrackEncoder(TrackRate aTrackRate) + : TrackEncoder() + , mFrameWidth(0) + , mFrameHeight(0) + , mDisplayWidth(0) + , mDisplayHeight(0) + , mTrackRate(aTrackRate) + , mTotalFrameDuration(0) + , mLastFrameDuration(0) + , mVideoBitrate(0) + {} + + /** + * Notified by the same callback of MediaEncoder when it has received a track + * change from MediaStreamGraph. Called on the MediaStreamGraph thread. + */ + void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID, + StreamTime aTrackOffset, + uint32_t aTrackEvents, + const MediaSegment& aQueuedMedia) override; + /** + * Measure size of mRawSegment + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + void SetBitrate(const uint32_t aBitrate) override + { + mVideoBitrate = aBitrate; + } + + void Init(const VideoSegment& aSegment); + + void SetCurrentFrames(const VideoSegment& aSegment); + + StreamTime SecondsToMediaTime(double aS) const + { + NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX/TRACK_RATE_MAX, + "Bad seconds"); + return mTrackRate * aS; + } + +protected: + /** + * Initialized the video encoder. In order to collect the value of width and + * height of source frames, this initialization is delayed until we have + * received the first valid video frame from MediaStreamGraph; + * mReentrantMonitor will be notified after it has successfully initialized, + * and this method is called on the MediaStramGraph thread. + */ + virtual nsresult Init(int aWidth, int aHeight, int aDisplayWidth, + int aDisplayHeight) = 0; + + /** + * Appends source video frames to mRawSegment. We only append the source chunk + * if it is unique to mLastChunk. Called on the MediaStreamGraph thread. + */ + nsresult AppendVideoSegment(const VideoSegment& aSegment); + + /** + * Tells the video track encoder that we've reached the end of source stream, + * and wakes up mReentrantMonitor if encoder is waiting for more track data. + * Called on the MediaStreamGraph thread. + */ + void NotifyEndOfStream() override; + + /** + * The width of source video frame, ceiled if the source width is odd. + */ + int mFrameWidth; + + /** + * The height of source video frame, ceiled if the source height is odd. + */ + int mFrameHeight; + + /** + * The display width of source video frame. + */ + int mDisplayWidth; + + /** + * The display height of source video frame. + */ + int mDisplayHeight; + + /** + * The track rate of source video. + */ + TrackRate mTrackRate; + + /** + * The total duration of frames in encoded video in StreamTime, kept track of + * in subclasses. + */ + StreamTime mTotalFrameDuration; + + /** + * The last unique frame and duration we've sent to track encoder, + * kept track of in subclasses. + */ + VideoFrame mLastFrame; + StreamTime mLastFrameDuration; + + /** + * A segment queue of audio track data, protected by mReentrantMonitor. + */ + VideoSegment mRawSegment; + + uint32_t mVideoBitrate; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/TrackMetadataBase.h b/dom/media/encoder/TrackMetadataBase.h new file mode 100644 index 000000000..a8b818c09 --- /dev/null +++ b/dom/media/encoder/TrackMetadataBase.h @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TrackMetadataBase_h_ +#define TrackMetadataBase_h_ + +#include "nsTArray.h" +#include "nsCOMPtr.h" +namespace mozilla { + +// A class represent meta data for various codec format. Only support one track information. +class TrackMetadataBase +{ +public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackMetadataBase) + enum MetadataKind { + METADATA_OPUS, // Represent the Opus metadata + METADATA_VP8, + METADATA_VORBIS, + METADATA_AVC, + METADATA_AAC, + METADATA_AMR, + METADATA_EVRC, + METADATA_UNKNOWN // Metadata Kind not set + }; + // Return the specific metadata kind + virtual MetadataKind GetKind() const = 0; + +protected: + // Protected destructor, to discourage deletion outside of Release(): + virtual ~TrackMetadataBase() {} +}; + +// The base class for audio metadata. +class AudioTrackMetadata : public TrackMetadataBase { +public: + // The duration of each sample set generated by encoder. (counted by samples) + // If the duration is variant, this value should return 0. + virtual uint32_t GetAudioFrameDuration() = 0; + + // The size of each sample set generated by encoder. (counted by byte) + // If the size is variant, this value should return 0. + virtual uint32_t GetAudioFrameSize() = 0; + + // AudioSampleRate is the number of audio sample per second. + virtual uint32_t GetAudioSampleRate() = 0; + + virtual uint32_t GetAudioChannels() = 0; +}; + +// The base class for video metadata. +class VideoTrackMetadata : public TrackMetadataBase { +public: + // VideoHeight and VideoWidth are the frame size of the elementary stream. + virtual uint32_t GetVideoHeight() = 0; + virtual uint32_t GetVideoWidth() = 0; + + // VideoDisplayHeight and VideoDisplayWidth are the display frame size. + virtual uint32_t GetVideoDisplayHeight() = 0; + virtual uint32_t GetVideoDisplayWidth() = 0; + + // VideoClockRate is the number of samples per second in video frame's + // timestamp. + // For example, if VideoClockRate is 90k Hz and VideoFrameRate is + // 30 fps, each frame's sample duration will be 3000 Hz. + virtual uint32_t GetVideoClockRate() = 0; + + // VideoFrameRate is numner of frames per second. + virtual uint32_t GetVideoFrameRate() = 0; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/VP8TrackEncoder.cpp b/dom/media/encoder/VP8TrackEncoder.cpp new file mode 100644 index 000000000..1e5451f0f --- /dev/null +++ b/dom/media/encoder/VP8TrackEncoder.cpp @@ -0,0 +1,678 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VP8TrackEncoder.h" +#include "GeckoProfiler.h" +#include "LayersLogging.h" +#include "libyuv.h" +#include "mozilla/gfx/2D.h" +#include "prsystem.h" +#include "VideoSegment.h" +#include "VideoUtils.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "WebMWriter.h" +#include "mozilla/media/MediaUtils.h" + +namespace mozilla { + +LazyLogModule gVP8TrackEncoderLog("VP8TrackEncoder"); +#define VP8LOG(msg, ...) MOZ_LOG(gVP8TrackEncoderLog, mozilla::LogLevel::Debug, \ + (msg, ##__VA_ARGS__)) +// Debug logging macro with object pointer and class name. + +#define DEFAULT_BITRATE_BPS 2500000 +#define DEFAULT_ENCODE_FRAMERATE 30 + +using namespace mozilla::gfx; +using namespace mozilla::layers; + +VP8TrackEncoder::VP8TrackEncoder(TrackRate aTrackRate) + : VideoTrackEncoder(aTrackRate) + , mEncodedFrameDuration(0) + , mEncodedTimestamp(0) + , mRemainingTicks(0) + , mVPXContext(new vpx_codec_ctx_t()) + , mVPXImageWrapper(new vpx_image_t()) +{ + MOZ_COUNT_CTOR(VP8TrackEncoder); +} + +VP8TrackEncoder::~VP8TrackEncoder() +{ + if (mInitialized) { + vpx_codec_destroy(mVPXContext); + } + + if (mVPXImageWrapper) { + vpx_img_free(mVPXImageWrapper); + } + MOZ_COUNT_DTOR(VP8TrackEncoder); +} + +nsresult +VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, + int32_t aDisplayHeight) +{ + if (aWidth < 1 || aHeight < 1 || aDisplayWidth < 1 || aDisplayHeight < 1) { + return NS_ERROR_FAILURE; + } + + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + mEncodedFrameRate = DEFAULT_ENCODE_FRAMERATE; + mEncodedFrameDuration = mTrackRate / mEncodedFrameRate; + mFrameWidth = aWidth; + mFrameHeight = aHeight; + mDisplayWidth = aDisplayWidth; + mDisplayHeight = aDisplayHeight; + + // Encoder configuration structure. + vpx_codec_enc_cfg_t config; + memset(&config, 0, sizeof(vpx_codec_enc_cfg_t)); + if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config, 0)) { + return NS_ERROR_FAILURE; + } + + // Creating a wrapper to the image - setting image data to NULL. Actual + // pointer will be set in encode. Setting align to 1, as it is meaningless + // (actual memory is not allocated). + vpx_img_wrap(mVPXImageWrapper, VPX_IMG_FMT_I420, + mFrameWidth, mFrameHeight, 1, nullptr); + + config.g_w = mFrameWidth; + config.g_h = mFrameHeight; + // TODO: Maybe we should have various aFrameRate bitrate pair for each devices? + // or for different platform + + // rc_target_bitrate needs kbit/s + config.rc_target_bitrate = (mVideoBitrate != 0 ? mVideoBitrate : DEFAULT_BITRATE_BPS)/1000; + + // Setting the time base of the codec + config.g_timebase.num = 1; + config.g_timebase.den = mTrackRate; + + config.g_error_resilient = 0; + + config.g_lag_in_frames = 0; // 0- no frame lagging + + int32_t number_of_cores = PR_GetNumberOfProcessors(); + if (mFrameWidth * mFrameHeight > 1280 * 960 && number_of_cores >= 6) { + config.g_threads = 3; // 3 threads for 1080p. + } else if (mFrameWidth * mFrameHeight > 640 * 480 && number_of_cores >= 3) { + config.g_threads = 2; // 2 threads for qHD/HD. + } else { + config.g_threads = 1; // 1 thread for VGA or less + } + + // rate control settings + config.rc_dropframe_thresh = 0; + config.rc_end_usage = VPX_CBR; + config.g_pass = VPX_RC_ONE_PASS; + // ffmpeg doesn't currently support streams that use resize. + // Therefore, for safety, we should turn it off until it does. + config.rc_resize_allowed = 0; + config.rc_undershoot_pct = 100; + config.rc_overshoot_pct = 15; + config.rc_buf_initial_sz = 500; + config.rc_buf_optimal_sz = 600; + config.rc_buf_sz = 1000; + + config.kf_mode = VPX_KF_AUTO; + // Ensure that we can output one I-frame per second. + config.kf_max_dist = mEncodedFrameRate; + + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + if (vpx_codec_enc_init(mVPXContext, vpx_codec_vp8_cx(), &config, flags)) { + return NS_ERROR_FAILURE; + } + + vpx_codec_control(mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(mVPXContext, VP8E_SET_CPUUSED, -6); + vpx_codec_control(mVPXContext, VP8E_SET_TOKEN_PARTITIONS, + VP8_ONE_TOKENPARTITION); + + mInitialized = true; + mon.NotifyAll(); + + return NS_OK; +} + +already_AddRefed<TrackMetadataBase> +VP8TrackEncoder::GetMetadata() +{ + PROFILER_LABEL("VP8TrackEncoder", "GetMetadata", + js::ProfileEntry::Category::OTHER); + { + // Wait if mEncoder is not initialized. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + while (!mCanceled && !mInitialized) { + mon.Wait(); + } + } + + if (mCanceled || mEncodingComplete) { + return nullptr; + } + + RefPtr<VP8Metadata> meta = new VP8Metadata(); + meta->mWidth = mFrameWidth; + meta->mHeight = mFrameHeight; + meta->mDisplayWidth = mDisplayWidth; + meta->mDisplayHeight = mDisplayHeight; + meta->mEncodedFrameRate = mEncodedFrameRate; + + return meta.forget(); +} + +bool +VP8TrackEncoder::GetEncodedPartitions(EncodedFrameContainer& aData) +{ + vpx_codec_iter_t iter = nullptr; + EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME; + nsTArray<uint8_t> frameData; + const vpx_codec_cx_pkt_t *pkt = nullptr; + while ((pkt = vpx_codec_get_cx_data(mVPXContext, &iter)) != nullptr) { + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + // Copy the encoded data from libvpx to frameData + frameData.AppendElements((uint8_t*)pkt->data.frame.buf, + pkt->data.frame.sz); + break; + } + default: { + break; + } + } + // End of frame + if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + frameType = EncodedFrame::VP8_I_FRAME; + } + break; + } + } + + if (!frameData.IsEmpty()) { + // Copy the encoded data to aData. + EncodedFrame* videoData = new EncodedFrame(); + videoData->SetFrameType(frameType); + // Convert the timestamp and duration to Usecs. + CheckedInt64 timestamp = FramesToUsecs(pkt->data.frame.pts, mTrackRate); + if (timestamp.isValid()) { + videoData->SetTimeStamp((uint64_t)timestamp.value()); + } + CheckedInt64 duration = FramesToUsecs(pkt->data.frame.duration, mTrackRate); + if (duration.isValid()) { + videoData->SetDuration((uint64_t)duration.value()); + } + videoData->SwapInFrameData(frameData); + VP8LOG("GetEncodedPartitions TimeStamp %lld Duration %lld\n", + videoData->GetTimeStamp(), videoData->GetDuration()); + VP8LOG("frameType %d\n", videoData->GetFrameType()); + aData.AppendEncodedFrame(videoData); + } + + return !!pkt; +} + +static bool isYUV420(const PlanarYCbCrImage::Data *aData) +{ + if (aData->mYSize == aData->mCbCrSize * 2) { + return true; + } + return false; +} + +static bool isYUV422(const PlanarYCbCrImage::Data *aData) +{ + if ((aData->mYSize.width == aData->mCbCrSize.width * 2) && + (aData->mYSize.height == aData->mCbCrSize.height)) { + return true; + } + return false; +} + +static bool isYUV444(const PlanarYCbCrImage::Data *aData) +{ + if (aData->mYSize == aData->mCbCrSize) { + return true; + } + return false; +} + +nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk &aChunk) +{ + RefPtr<Image> img; + if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) { + if (!mMuteFrame) { + mMuteFrame = VideoFrame::CreateBlackImage(gfx::IntSize(mFrameWidth, mFrameHeight)); + MOZ_ASSERT(mMuteFrame); + } + img = mMuteFrame; + } else { + img = aChunk.mFrame.GetImage(); + } + + if (img->GetSize() != IntSize(mFrameWidth, mFrameHeight)) { + VP8LOG("Dynamic resolution changes (was %dx%d, now %dx%d) are unsupported\n", + mFrameWidth, mFrameHeight, img->GetSize().width, img->GetSize().height); + return NS_ERROR_FAILURE; + } + + ImageFormat format = img->GetFormat(); + if (format == ImageFormat::PLANAR_YCBCR) { + PlanarYCbCrImage* yuv = static_cast<PlanarYCbCrImage *>(img.get()); + + MOZ_RELEASE_ASSERT(yuv); + if (!yuv->IsValid()) { + NS_WARNING("PlanarYCbCrImage is not valid"); + return NS_ERROR_FAILURE; + } + const PlanarYCbCrImage::Data *data = yuv->GetData(); + + if (isYUV420(data) && !data->mCbSkip) { + // 420 planar, no need for conversions + mVPXImageWrapper->planes[VPX_PLANE_Y] = data->mYChannel; + mVPXImageWrapper->planes[VPX_PLANE_U] = data->mCbChannel; + mVPXImageWrapper->planes[VPX_PLANE_V] = data->mCrChannel; + mVPXImageWrapper->stride[VPX_PLANE_Y] = data->mYStride; + mVPXImageWrapper->stride[VPX_PLANE_U] = data->mCbCrStride; + mVPXImageWrapper->stride[VPX_PLANE_V] = data->mCbCrStride; + + return NS_OK; + } + } + + // Not 420 planar, have to convert + uint32_t yPlaneSize = mFrameWidth * mFrameHeight; + uint32_t halfWidth = (mFrameWidth + 1) / 2; + uint32_t halfHeight = (mFrameHeight + 1) / 2; + uint32_t uvPlaneSize = halfWidth * halfHeight; + + if (mI420Frame.IsEmpty()) { + mI420Frame.SetLength(yPlaneSize + uvPlaneSize * 2); + } + + uint8_t *y = mI420Frame.Elements(); + uint8_t *cb = mI420Frame.Elements() + yPlaneSize; + uint8_t *cr = mI420Frame.Elements() + yPlaneSize + uvPlaneSize; + + if (format == ImageFormat::PLANAR_YCBCR) { + PlanarYCbCrImage* yuv = static_cast<PlanarYCbCrImage *>(img.get()); + + MOZ_RELEASE_ASSERT(yuv); + if (!yuv->IsValid()) { + NS_WARNING("PlanarYCbCrImage is not valid"); + return NS_ERROR_FAILURE; + } + const PlanarYCbCrImage::Data *data = yuv->GetData(); + + int rv; + std::string yuvFormat; + if (isYUV420(data) && data->mCbSkip) { + // If mCbSkip is set, we assume it's nv12 or nv21. + if (data->mCbChannel < data->mCrChannel) { // nv12 + rv = libyuv::NV12ToI420(data->mYChannel, data->mYStride, + data->mCbChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + yuvFormat = "NV12"; + } else { // nv21 + rv = libyuv::NV21ToI420(data->mYChannel, data->mYStride, + data->mCrChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + yuvFormat = "NV21"; + } + } else if (isYUV444(data) && !data->mCbSkip) { + rv = libyuv::I444ToI420(data->mYChannel, data->mYStride, + data->mCbChannel, data->mCbCrStride, + data->mCrChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + yuvFormat = "I444"; + } else if (isYUV422(data) && !data->mCbSkip) { + rv = libyuv::I422ToI420(data->mYChannel, data->mYStride, + data->mCbChannel, data->mCbCrStride, + data->mCrChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + yuvFormat = "I422"; + } else { + VP8LOG("Unsupported planar format\n"); + NS_ASSERTION(false, "Unsupported planar format"); + return NS_ERROR_NOT_IMPLEMENTED; + } + + if (rv != 0) { + VP8LOG("Converting an %s frame to I420 failed\n", yuvFormat.c_str()); + return NS_ERROR_FAILURE; + } + + VP8LOG("Converted an %s frame to I420\n", yuvFormat.c_str()); + } else { + // Not YCbCr at all. Try to get access to the raw data and convert. + + RefPtr<SourceSurface> surf = GetSourceSurface(img.forget()); + if (!surf) { + VP8LOG("Getting surface from %s image failed\n", Stringify(format).c_str()); + return NS_ERROR_FAILURE; + } + + RefPtr<DataSourceSurface> data = surf->GetDataSurface(); + if (!data) { + VP8LOG("Getting data surface from %s image with %s (%s) surface failed\n", + Stringify(format).c_str(), Stringify(surf->GetType()).c_str(), + Stringify(surf->GetFormat()).c_str()); + return NS_ERROR_FAILURE; + } + + DataSourceSurface::ScopedMap map(data, DataSourceSurface::READ); + if (!map.IsMapped()) { + VP8LOG("Reading DataSourceSurface from %s image with %s (%s) surface failed\n", + Stringify(format).c_str(), Stringify(surf->GetType()).c_str(), + Stringify(surf->GetFormat()).c_str()); + return NS_ERROR_FAILURE; + } + + int rv; + switch (surf->GetFormat()) { + case SurfaceFormat::B8G8R8A8: + case SurfaceFormat::B8G8R8X8: + rv = libyuv::ARGBToI420(static_cast<uint8*>(map.GetData()), + map.GetStride(), + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + break; + case SurfaceFormat::R5G6B5_UINT16: + rv = libyuv::RGB565ToI420(static_cast<uint8*>(map.GetData()), + map.GetStride(), + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + break; + default: + VP8LOG("Unsupported SourceSurface format %s\n", + Stringify(surf->GetFormat()).c_str()); + NS_ASSERTION(false, "Unsupported SourceSurface format"); + return NS_ERROR_NOT_IMPLEMENTED; + } + + if (rv != 0) { + VP8LOG("%s to I420 conversion failed\n", + Stringify(surf->GetFormat()).c_str()); + return NS_ERROR_FAILURE; + } + + VP8LOG("Converted a %s frame to I420\n", + Stringify(surf->GetFormat()).c_str()); + } + + mVPXImageWrapper->planes[VPX_PLANE_Y] = y; + mVPXImageWrapper->planes[VPX_PLANE_U] = cb; + mVPXImageWrapper->planes[VPX_PLANE_V] = cr; + mVPXImageWrapper->stride[VPX_PLANE_Y] = mFrameWidth; + mVPXImageWrapper->stride[VPX_PLANE_U] = halfWidth; + mVPXImageWrapper->stride[VPX_PLANE_V] = halfWidth; + + return NS_OK; +} + +void +VP8TrackEncoder::ReplyGetSourceSurface(already_AddRefed<gfx::SourceSurface> aSurf) +{ + mSourceSurface = aSurf; +} + +already_AddRefed<gfx::SourceSurface> +VP8TrackEncoder::GetSourceSurface(already_AddRefed<Image> aImg) +{ + RefPtr<Image> img = aImg; + mSourceSurface = nullptr; + if (img) { + if (img->AsGLImage() && !NS_IsMainThread()) { + // GLImage::GetAsSourceSurface() only support main thread + RefPtr<Runnable> getsourcesurface_runnable = + media::NewRunnableFrom([this, img]() -> nsresult { + // Due to the parameter DISPATCH_SYNC, encoder thread will stock at + // MediaRecorder::Session::Extract(bool). There is no chance + // that TrackEncoder will be destroyed during this period. So + // there is no need to use RefPtr to hold TrackEncoder. + ReplyGetSourceSurface(img->GetAsSourceSurface()); + return NS_OK; + }); + NS_DispatchToMainThread(getsourcesurface_runnable, NS_DISPATCH_SYNC); + } else { + mSourceSurface = img->GetAsSourceSurface(); + } + } + return mSourceSurface.forget(); +} + +// These two define value used in GetNextEncodeOperation to determine the +// EncodeOperation for next target frame. +#define I_FRAME_RATIO (0.5) +#define SKIP_FRAME_RATIO (0.75) + +/** + * Compares the elapsed time from the beginning of GetEncodedTrack and + * the processed frame duration in mSourceSegment + * in order to set the nextEncodeOperation for next target frame. + */ +VP8TrackEncoder::EncodeOperation +VP8TrackEncoder::GetNextEncodeOperation(TimeDuration aTimeElapsed, + StreamTime aProcessedDuration) +{ + int64_t durationInUsec = + FramesToUsecs(aProcessedDuration + mEncodedFrameDuration, + mTrackRate).value(); + if (aTimeElapsed.ToMicroseconds() > (durationInUsec * SKIP_FRAME_RATIO)) { + // The encoder is too slow. + // We should skip next frame to consume the mSourceSegment. + return SKIP_FRAME; + } else if (aTimeElapsed.ToMicroseconds() > (durationInUsec * I_FRAME_RATIO)) { + // The encoder is a little slow. + // We force the encoder to encode an I-frame to accelerate. + return ENCODE_I_FRAME; + } else { + return ENCODE_NORMAL_FRAME; + } +} + +StreamTime +VP8TrackEncoder::CalculateRemainingTicks(StreamTime aDurationCopied, + StreamTime aEncodedDuration) +{ + return mRemainingTicks + aEncodedDuration - aDurationCopied; +} + +// Try to extend the encodedDuration as long as possible if the target frame +// has a long duration. +StreamTime +VP8TrackEncoder::CalculateEncodedDuration(StreamTime aDurationCopied) +{ + StreamTime temp64 = aDurationCopied; + StreamTime encodedDuration = mEncodedFrameDuration; + temp64 -= mRemainingTicks; + while (temp64 > mEncodedFrameDuration) { + temp64 -= mEncodedFrameDuration; + encodedDuration += mEncodedFrameDuration; + } + return encodedDuration; +} + +/** + * Encoding flow in GetEncodedTrack(): + * 1: Check the mInitialized state and the packet duration. + * 2: Move the data from mRawSegment to mSourceSegment. + * 3: Encode the video chunks in mSourceSegment in a for-loop. + * 3.1: Pick the video chunk by mRemainingTicks. + * 3.2: Calculate the encoding duration for the parameter of vpx_codec_encode(). + * The encoding duration is a multiple of mEncodedFrameDuration. + * 3.3: Setup the video chunk to mVPXImageWrapper by PrepareRawFrame(). + * 3.4: Send frame into vp8 encoder by vpx_codec_encode(). + * 3.5: Get the output frame from encoder by calling GetEncodedPartitions(). + * 3.6: Calculate the mRemainingTicks for next target frame. + * 3.7: Set the nextEncodeOperation for the next target frame. + * There is a heuristic: If the frame duration we have processed in + * mSourceSegment is 100ms, means that we can't spend more than 100ms to + * encode it. + * 4. Remove the encoded chunks in mSourceSegment after for-loop. + * + * Ex1: Input frame rate is 100 => input frame duration is 10ms for each. + * mEncodedFrameRate is 30 => output frame duration is 33ms. + * In this case, the frame duration in mSourceSegment will be: + * 1st : 0~10ms + * 2nd : 10~20ms + * 3rd : 20~30ms + * 4th : 30~40ms + * ... + * The VP8 encoder will take the 1st and 4th frames to encode. At beginning + * mRemainingTicks is 0 for 1st frame, then the mRemainingTicks is set + * to 23 to pick the 4th frame. (mEncodedFrameDuration - 1st frame duration) + * + * Ex2: Input frame rate is 25 => frame duration is 40ms for each. + * mEncodedFrameRate is 30 => output frame duration is 33ms. + * In this case, the frame duration in mSourceSegment will be: + * 1st : 0~40ms + * 2nd : 40~80ms + * 3rd : 80~120ms + * 4th : 120~160ms + * ... + * Because the input frame duration is 40ms larger than 33ms, so the first + * encoded frame duration will be 66ms by calling CalculateEncodedDuration. + * And the mRemainingTicks will be set to 26 + * (CalculateRemainingTicks 0+66-40) in order to pick the next frame(2nd) + * in mSourceSegment. + */ +nsresult +VP8TrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData) +{ + PROFILER_LABEL("VP8TrackEncoder", "GetEncodedTrack", + js::ProfileEntry::Category::OTHER); + bool EOS; + { + // Move all the samples from mRawSegment to mSourceSegment. We only hold + // the monitor in this block. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + // Wait if mEncoder is not initialized, or when not enough raw data, but is + // not the end of stream nor is being canceled. + while (!mCanceled && (!mInitialized || + (mRawSegment.GetDuration() + mSourceSegment.GetDuration() < + mEncodedFrameDuration && !mEndOfStream))) { + mon.Wait(); + } + if (mCanceled || mEncodingComplete) { + return NS_ERROR_FAILURE; + } + mSourceSegment.AppendFrom(&mRawSegment); + EOS = mEndOfStream; + } + + VideoSegment::ChunkIterator iter(mSourceSegment); + StreamTime durationCopied = 0; + StreamTime totalProcessedDuration = 0; + TimeStamp timebase = TimeStamp::Now(); + EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME; + + for (; !iter.IsEnded(); iter.Next()) { + VideoChunk &chunk = *iter; + // Accumulate chunk's duration to durationCopied until it reaches + // mRemainingTicks. + durationCopied += chunk.GetDuration(); + MOZ_ASSERT(mRemainingTicks <= mEncodedFrameDuration); + VP8LOG("durationCopied %lld mRemainingTicks %lld\n", + durationCopied, mRemainingTicks); + if (durationCopied >= mRemainingTicks) { + VP8LOG("nextEncodeOperation is %d\n",nextEncodeOperation); + // Calculate encodedDuration for this target frame. + StreamTime encodedDuration = CalculateEncodedDuration(durationCopied); + + // Encode frame. + if (nextEncodeOperation != SKIP_FRAME) { + nsresult rv = PrepareRawFrame(chunk); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + + // Encode the data with VP8 encoder + int flags = (nextEncodeOperation == ENCODE_NORMAL_FRAME) ? + 0 : VPX_EFLAG_FORCE_KF; + if (vpx_codec_encode(mVPXContext, mVPXImageWrapper, mEncodedTimestamp, + (unsigned long)encodedDuration, flags, + VPX_DL_REALTIME)) { + return NS_ERROR_FAILURE; + } + // Get the encoded data from VP8 encoder. + GetEncodedPartitions(aData); + } else { + // SKIP_FRAME + // Extend the duration of the last encoded data in aData + // because this frame will be skip. + RefPtr<EncodedFrame> last = aData.GetEncodedFrames().LastElement(); + if (last) { + CheckedInt64 skippedDuration = FramesToUsecs(chunk.mDuration, mTrackRate); + if (skippedDuration.isValid() && skippedDuration.value() > 0) { + last->SetDuration(last->GetDuration() + + (static_cast<uint64_t>(skippedDuration.value()))); + } + } + } + // Move forward the mEncodedTimestamp. + mEncodedTimestamp += encodedDuration; + totalProcessedDuration += durationCopied; + // Calculate mRemainingTicks for next target frame. + mRemainingTicks = CalculateRemainingTicks(durationCopied, + encodedDuration); + + // Check the remain data is enough for next target frame. + if (mSourceSegment.GetDuration() - totalProcessedDuration + >= mEncodedFrameDuration) { + TimeDuration elapsedTime = TimeStamp::Now() - timebase; + nextEncodeOperation = GetNextEncodeOperation(elapsedTime, + totalProcessedDuration); + // Reset durationCopied for next iteration. + durationCopied = 0; + } else { + // Process done, there is no enough data left for next iteration, + // break the for-loop. + break; + } + } + } + // Remove the chunks we have processed. + mSourceSegment.RemoveLeading(totalProcessedDuration); + VP8LOG("RemoveLeading %lld\n",totalProcessedDuration); + + // End of stream, pull the rest frames in encoder. + if (EOS) { + VP8LOG("mEndOfStream is true\n"); + mEncodingComplete = true; + // Bug 1243611, keep calling vpx_codec_encode and vpx_codec_get_cx_data + // until vpx_codec_get_cx_data return null. + + do { + if (vpx_codec_encode(mVPXContext, nullptr, mEncodedTimestamp, + mEncodedFrameDuration, 0, VPX_DL_REALTIME)) { + return NS_ERROR_FAILURE; + } + } while(GetEncodedPartitions(aData)); + } + + return NS_OK ; +} + +} // namespace mozilla diff --git a/dom/media/encoder/VP8TrackEncoder.h b/dom/media/encoder/VP8TrackEncoder.h new file mode 100644 index 000000000..5a046ee5d --- /dev/null +++ b/dom/media/encoder/VP8TrackEncoder.h @@ -0,0 +1,99 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VP8TrackEncoder_h_ +#define VP8TrackEncoder_h_ + +#include "TrackEncoder.h" +#include "vpx/vpx_codec.h" + +namespace mozilla { + +typedef struct vpx_codec_ctx vpx_codec_ctx_t; +typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t; +typedef struct vpx_image vpx_image_t; + +/** + * VP8TrackEncoder implements VideoTrackEncoder by using libvpx library. + * We implement a realtime and fixed FPS encoder. In order to achieve that, + * there is a pick target frame and drop frame encoding policy implemented in + * GetEncodedTrack. + */ +class VP8TrackEncoder : public VideoTrackEncoder +{ + enum EncodeOperation { + ENCODE_NORMAL_FRAME, // VP8 track encoder works normally. + ENCODE_I_FRAME, // The next frame will be encoded as I-Frame. + SKIP_FRAME, // Skip the next frame. + }; +public: + explicit VP8TrackEncoder(TrackRate aTrackRate); + virtual ~VP8TrackEncoder(); + + already_AddRefed<TrackMetadataBase> GetMetadata() final override; + + nsresult GetEncodedTrack(EncodedFrameContainer& aData) final override; + + void ReplyGetSourceSurface(already_AddRefed<gfx::SourceSurface> aSurf); +protected: + nsresult Init(int32_t aWidth, int32_t aHeight, + int32_t aDisplayWidth, int32_t aDisplayHeight) final override; + +private: + // Calculate the target frame's encoded duration. + StreamTime CalculateEncodedDuration(StreamTime aDurationCopied); + + // Calculate the mRemainingTicks for next target frame. + StreamTime CalculateRemainingTicks(StreamTime aDurationCopied, + StreamTime aEncodedDuration); + + // Get the EncodeOperation for next target frame. + EncodeOperation GetNextEncodeOperation(TimeDuration aTimeElapsed, + StreamTime aProcessedDuration); + + // Get the encoded data from encoder to aData. + // Return value: false if the vpx_codec_get_cx_data returns null + // for EOS detection. + bool GetEncodedPartitions(EncodedFrameContainer& aData); + + // Prepare the input data to the mVPXImageWrapper for encoding. + nsresult PrepareRawFrame(VideoChunk &aChunk); + + already_AddRefed<gfx::SourceSurface> GetSourceSurface(already_AddRefed<layers::Image> aImg); + + // Output frame rate. + uint32_t mEncodedFrameRate; + // Duration for the output frame, reciprocal to mEncodedFrameRate. + StreamTime mEncodedFrameDuration; + // Encoded timestamp. + StreamTime mEncodedTimestamp; + // Duration to the next encode frame. + StreamTime mRemainingTicks; + + // Muted frame, we only create it once. + RefPtr<layers::Image> mMuteFrame; + + // I420 frame, for converting to I420. + nsTArray<uint8_t> mI420Frame; + + /** + * A local segment queue which takes the raw data out from mRawSegment in the + * call of GetEncodedTrack(). Since we implement the fixed FPS encoding + * policy, it needs to be global in order to store the leftover segments + * taken from mRawSegment. + */ + VideoSegment mSourceSegment; + + // VP8 relative members. + // Codec context structure. + nsAutoPtr<vpx_codec_ctx_t> mVPXContext; + // Image Descriptor. + nsAutoPtr<vpx_image_t> mVPXImageWrapper; + RefPtr<gfx::SourceSurface> mSourceSurface; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/fmp4_muxer/AMRBox.cpp b/dom/media/encoder/fmp4_muxer/AMRBox.cpp new file mode 100644 index 000000000..cd1a34fae --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AMRBox.cpp @@ -0,0 +1,84 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "AMRBox.h" +#include "ISOTrackMetadata.h" + +namespace mozilla { + +nsresult +AMRSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + nsresult rv = amr_special_box->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + + *aBoxSize = size; + return NS_OK; +} + +nsresult +AMRSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = AudioSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = amr_special_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +AMRSampleEntry::AMRSampleEntry(ISOControl* aControl) + : AudioSampleEntry(NS_LITERAL_CSTRING("samr"), aControl) +{ + amr_special_box = new AMRSpecificBox(aControl); + MOZ_COUNT_CTOR(AMRSampleEntry); +} + +AMRSampleEntry::~AMRSampleEntry() +{ + MOZ_COUNT_DTOR(AMRSampleEntry); +} + +nsresult +AMRSpecificBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + rv = frag->GetCSD(amrDecSpecInfo); + NS_ENSURE_SUCCESS(rv, rv); + + size += amrDecSpecInfo.Length(); + *aBoxSize = size; + + return NS_OK; +} + +nsresult +AMRSpecificBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + mControl->Write(amrDecSpecInfo.Elements(), amrDecSpecInfo.Length()); + return NS_OK; +} + +AMRSpecificBox::AMRSpecificBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("damr"), aControl) +{ + MOZ_COUNT_CTOR(AMRSpecificBox); +} + +AMRSpecificBox::~AMRSpecificBox() +{ + MOZ_COUNT_DTOR(AMRSpecificBox); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/AMRBox.h b/dom/media/encoder/fmp4_muxer/AMRBox.h new file mode 100644 index 000000000..645d7f89c --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AMRBox.h @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AMRBOX_h_ +#define AMRBOX_h_ + +#include "nsTArray.h" +#include "MuxerOperation.h" + +namespace mozilla { + +class ISOControl; + +// 3GPP TS 26.244 6.7 'AMRSpecificBox field for AMRSampleEntry box' +// Box type: 'damr' +class AMRSpecificBox : public Box { +public: + // 3GPP members + nsTArray<uint8_t> amrDecSpecInfo; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // AMRSpecificBox methods + AMRSpecificBox(ISOControl* aControl); + ~AMRSpecificBox(); +}; + +// 3GPP TS 26.244 6.5 'AMRSampleEntry box' +// Box type: 'sawb' +class AMRSampleEntry : public AudioSampleEntry { +public: + // 3GPP members + RefPtr<AMRSpecificBox> amr_special_box; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // AMRSampleEntry methods + AMRSampleEntry(ISOControl* aControl); + ~AMRSampleEntry(); +}; + +} + +#endif // AMRBOX_h_ diff --git a/dom/media/encoder/fmp4_muxer/AVCBox.cpp b/dom/media/encoder/fmp4_muxer/AVCBox.cpp new file mode 100644 index 000000000..a45cda8b7 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AVCBox.cpp @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "AVCBox.h" + +namespace mozilla { + +nsresult +AVCSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t avc_box_size = 0; + nsresult rv; + rv = avcConfigBox->Generate(&avc_box_size); + NS_ENSURE_SUCCESS(rv, rv); + + size += avc_box_size; + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +AVCSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = VisualSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = avcConfigBox->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +AVCSampleEntry::AVCSampleEntry(ISOControl* aControl) + : VisualSampleEntry(NS_LITERAL_CSTRING("avc1"), aControl) +{ + avcConfigBox = new AVCConfigurationBox(aControl); + MOZ_COUNT_CTOR(AVCSampleEntry); +} + +AVCSampleEntry::~AVCSampleEntry() +{ + MOZ_COUNT_DTOR(AVCSampleEntry); +} + +AVCConfigurationBox::AVCConfigurationBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("avcC"), aControl) +{ + MOZ_COUNT_CTOR(AVCConfigurationBox); +} + +AVCConfigurationBox::~AVCConfigurationBox() +{ + MOZ_COUNT_DTOR(AVCConfigurationBox); +} + +nsresult +AVCConfigurationBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + FragmentBuffer* frag = mControl->GetFragment(Video_Track); + rv = frag->GetCSD(avcConfig); + NS_ENSURE_SUCCESS(rv, rv); + size += avcConfig.Length(); + *aBoxSize = size; + return NS_OK; +} + +nsresult +AVCConfigurationBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + + mControl->Write(avcConfig.Elements(), avcConfig.Length()); + + return NS_OK; +} + +} diff --git a/dom/media/encoder/fmp4_muxer/AVCBox.h b/dom/media/encoder/fmp4_muxer/AVCBox.h new file mode 100644 index 000000000..9640d9e8f --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/AVCBox.h @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AVCBox_h_ +#define AVCBox_h_ + +#include "nsTArray.h" +#include "ISOMediaBoxes.h" + +namespace mozilla { + +class ISOControl; + +// 14496-12 8.5.2.2 +#define resolution_72_dpi 0x00480000 +#define video_depth 0x0018 + +// 14496-15 5.3.4.1 'Sample description name and format' +// Box type: 'avcC' +class AVCConfigurationBox : public Box { +public: + // ISO BMFF members + + // avcConfig is CodecSpecificData from 14496-15 '5.3.4.1 Sample description + // name and format. + // These data are generated by encoder and we encapsulated the generated + // bitstream into box directly. + nsTArray<uint8_t> avcConfig; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // AVCConfigurationBox methods + AVCConfigurationBox(ISOControl* aControl); + ~AVCConfigurationBox(); +}; + +// 14496-15 5.3.4.1 'Sample description name and format' +// Box type: 'avc1' +class AVCSampleEntry : public VisualSampleEntry { +public: + // ISO BMFF members + RefPtr<AVCConfigurationBox> avcConfigBox; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // VisualSampleEntry methods + AVCSampleEntry(ISOControl* aControl); + ~AVCSampleEntry(); +}; + +} + +#endif // AVCBox_h_ diff --git a/dom/media/encoder/fmp4_muxer/EVRCBox.cpp b/dom/media/encoder/fmp4_muxer/EVRCBox.cpp new file mode 100644 index 000000000..096e4013d --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/EVRCBox.cpp @@ -0,0 +1,84 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "EVRCBox.h" +#include "ISOTrackMetadata.h" + +namespace mozilla { + +nsresult +EVRCSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + nsresult rv = evrc_special_box->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + + *aBoxSize = size; + return NS_OK; +} + +nsresult +EVRCSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = AudioSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = evrc_special_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +EVRCSampleEntry::EVRCSampleEntry(ISOControl* aControl) + : AudioSampleEntry(NS_LITERAL_CSTRING("sevc"), aControl) +{ + evrc_special_box = new EVRCSpecificBox(aControl); + MOZ_COUNT_CTOR(EVRCSampleEntry); +} + +EVRCSampleEntry::~EVRCSampleEntry() +{ + MOZ_COUNT_DTOR(EVRCSampleEntry); +} + +nsresult +EVRCSpecificBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + rv = frag->GetCSD(evrcDecSpecInfo); + NS_ENSURE_SUCCESS(rv, rv); + + size += evrcDecSpecInfo.Length(); + *aBoxSize = size; + + return NS_OK; +} + +nsresult +EVRCSpecificBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + mControl->Write(evrcDecSpecInfo.Elements(), evrcDecSpecInfo.Length()); + return NS_OK; +} + +EVRCSpecificBox::EVRCSpecificBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("devc"), aControl) +{ + MOZ_COUNT_CTOR(EVRCSpecificBox); +} + +EVRCSpecificBox::~EVRCSpecificBox() +{ + MOZ_COUNT_DTOR(EVRCSpecificBox); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/EVRCBox.h b/dom/media/encoder/fmp4_muxer/EVRCBox.h new file mode 100644 index 000000000..31355849a --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/EVRCBox.h @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EVRCBOX_h_ +#define EVRCBOX_h_ + +#include "nsTArray.h" +#include "MuxerOperation.h" + +namespace mozilla { + +class ISOControl; + +// 3GPP TS 26.244 6.7 'EVRCSpecificBox field for EVRCSampleEntry box' +// Box type: 'devc' +class EVRCSpecificBox : public Box { +public: + // 3GPP members + nsTArray<uint8_t> evrcDecSpecInfo; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // EVRCSpecificBox methods + EVRCSpecificBox(ISOControl* aControl); + ~EVRCSpecificBox(); +}; + +// 3GPP TS 26.244 6.5 'EVRCSampleEntry box' +// Box type: 'sevc' +class EVRCSampleEntry : public AudioSampleEntry { +public: + // 3GPP members + RefPtr<EVRCSpecificBox> evrc_special_box; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // EVRCSampleEntry methods + EVRCSampleEntry(ISOControl* aControl); + ~EVRCSampleEntry(); +}; + +} + +#endif // EVRCBOX_h_ diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.cpp b/dom/media/encoder/fmp4_muxer/ISOControl.cpp new file mode 100644 index 000000000..6addaeb30 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOControl.cpp @@ -0,0 +1,415 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <time.h> +#include "nsAutoPtr.h" +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "EncodedFrameContainer.h" + +namespace mozilla { + +// For MP4 creation_time and modification_time offset from January 1, 1904 to +// January 1, 1970. +#define iso_time_offset 2082844800 + +FragmentBuffer::FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration) + : mTrackType(aTrackType) + , mFragDuration(aFragDuration) + , mMediaStartTime(0) + , mFragmentNumber(0) + , mLastFrameTimeOfLastFragment(0) + , mEOS(false) +{ + mFragArray.AppendElement(); + MOZ_COUNT_CTOR(FragmentBuffer); +} + +FragmentBuffer::~FragmentBuffer() +{ + MOZ_COUNT_DTOR(FragmentBuffer); +} + +bool +FragmentBuffer::HasEnoughData() +{ + // Audio or video frame is enough to form a moof. + return (mFragArray.Length() > 1); +} + +nsresult +FragmentBuffer::GetCSD(nsTArray<uint8_t>& aCSD) +{ + if (!mCSDFrame) { + return NS_ERROR_FAILURE; + } + aCSD.AppendElements(mCSDFrame->GetFrameData().Elements(), + mCSDFrame->GetFrameData().Length()); + + return NS_OK; +} + +nsresult +FragmentBuffer::AddFrame(EncodedFrame* aFrame) +{ + // already EOS, it rejects all new data. + if (mEOS) { + MOZ_ASSERT(0); + return NS_OK; + } + + EncodedFrame::FrameType type = aFrame->GetFrameType(); + if (type == EncodedFrame::AAC_CSD || type == EncodedFrame::AVC_CSD || + type == EncodedFrame::AMR_AUDIO_CSD || type == EncodedFrame::EVRC_AUDIO_CSD) { + mCSDFrame = aFrame; + // Use CSD's timestamp as the start time. Encoder should send CSD frame first + // and then data frames. + mMediaStartTime = aFrame->GetTimeStamp(); + mFragmentNumber = 1; + return NS_OK; + } + + // if the timestamp is incorrect, abort it. + if (aFrame->GetTimeStamp() < mMediaStartTime) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + mFragArray.LastElement().AppendElement(aFrame); + + // check if current fragment is reach the fragment duration. + if ((aFrame->GetTimeStamp() - mMediaStartTime) >= (mFragDuration * mFragmentNumber)) { + mFragArray.AppendElement(); + mFragmentNumber++; + } + + return NS_OK; +} + +nsresult +FragmentBuffer::GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment, + bool aFlush) +{ + // It should be called only if there is a complete fragment in mFragArray. + if (mFragArray.Length() <= 1 && !mEOS) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + if (aFlush) { + aFragment.SwapElements(mFragArray.ElementAt(0)); + mFragArray.RemoveElementAt(0); + } else { + aFragment.AppendElements(mFragArray.ElementAt(0)); + } + return NS_OK; +} + +uint32_t +FragmentBuffer::GetFirstFragmentSampleNumber() +{ + return mFragArray.ElementAt(0).Length(); +} + +uint32_t +FragmentBuffer::GetFirstFragmentSampleSize() +{ + uint32_t size = 0; + uint32_t len = mFragArray.ElementAt(0).Length(); + for (uint32_t i = 0; i < len; i++) { + size += mFragArray.ElementAt(0).ElementAt(i)->GetFrameData().Length(); + } + return size; +} + +ISOControl::ISOControl(uint32_t aMuxingType) + : mMuxingType(aMuxingType) + , mAudioFragmentBuffer(nullptr) + , mVideoFragmentBuffer(nullptr) + , mFragNum(0) + , mOutputSize(0) + , mBitCount(0) + , mBit(0) +{ + // Create a data array for first mp4 Box, ftyp. + mOutBuffers.SetLength(1); + MOZ_COUNT_CTOR(ISOControl); +} + +ISOControl::~ISOControl() +{ + MOZ_COUNT_DTOR(ISOControl); +} + +uint32_t +ISOControl::GetNextTrackID() +{ + return (mMetaArray.Length() + 1); +} + +uint32_t +ISOControl::GetTrackID(TrackMetadataBase::MetadataKind aKind) +{ + for (uint32_t i = 0; i < mMetaArray.Length(); i++) { + if (mMetaArray[i]->GetKind() == aKind) { + return (i + 1); + } + } + + // Track ID shouldn't be 0. It must be something wrong here. + MOZ_ASSERT(0); + return 0; +} + +nsresult +ISOControl::SetMetadata(TrackMetadataBase* aTrackMeta) +{ + if (aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AAC || + aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AMR || + aTrackMeta->GetKind() == TrackMetadataBase::METADATA_AVC || + aTrackMeta->GetKind() == TrackMetadataBase::METADATA_EVRC) { + mMetaArray.AppendElement(aTrackMeta); + return NS_OK; + } + return NS_ERROR_FAILURE; +} + +nsresult +ISOControl::GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta) +{ + for (uint32_t i = 0; i < mMetaArray.Length() ; i++) { + if (mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AAC || + mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AMR || + mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_EVRC) { + aAudMeta = static_cast<AudioTrackMetadata*>(mMetaArray[i].get()); + return NS_OK; + } + } + return NS_ERROR_FAILURE; +} + +nsresult +ISOControl::GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta) +{ + for (uint32_t i = 0; i < mMetaArray.Length() ; i++) { + if (mMetaArray[i]->GetKind() == TrackMetadataBase::METADATA_AVC) { + aVidMeta = static_cast<VideoTrackMetadata*>(mMetaArray[i].get()); + return NS_OK; + } + } + return NS_ERROR_FAILURE; +} + +bool +ISOControl::HasAudioTrack() +{ + RefPtr<AudioTrackMetadata> audMeta; + GetAudioMetadata(audMeta); + return audMeta; +} + +bool +ISOControl::HasVideoTrack() +{ + RefPtr<VideoTrackMetadata> vidMeta; + GetVideoMetadata(vidMeta); + return vidMeta; +} + +nsresult +ISOControl::SetFragment(FragmentBuffer* aFragment) +{ + if (aFragment->GetType() == Audio_Track) { + mAudioFragmentBuffer = aFragment; + } else { + mVideoFragmentBuffer = aFragment; + } + return NS_OK; +} + +FragmentBuffer* +ISOControl::GetFragment(uint32_t aType) +{ + if (aType == Audio_Track) { + return mAudioFragmentBuffer; + } else if (aType == Video_Track){ + return mVideoFragmentBuffer; + } + MOZ_ASSERT(0); + return nullptr; +} + +nsresult +ISOControl::GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs) +{ + uint32_t len = mOutBuffers.Length(); + for (uint32_t i = 0; i < len; i++) { + mOutBuffers[i].SwapElements(*aOutputBufs->AppendElement()); + } + return FlushBuf(); +} + +nsresult +ISOControl::FlushBuf() +{ + mOutBuffers.SetLength(1); + return NS_OK; +} + +uint32_t +ISOControl::WriteAVData(nsTArray<uint8_t>& aArray) +{ + MOZ_ASSERT(!mBitCount); + + uint32_t len = aArray.Length(); + if (!len) { + return 0; + } + + mOutputSize += len; + + // The last element already has data, allocated a new element for pointer + // swapping. + if (mOutBuffers.LastElement().Length()) { + mOutBuffers.AppendElement(); + } + // Swap the video/audio data pointer. + mOutBuffers.LastElement().SwapElements(aArray); + // Following data could be boxes, so appending a new uint8_t array here. + mOutBuffers.AppendElement(); + + return len; +} + +uint32_t +ISOControl::WriteBits(uint64_t aBits, size_t aNumBits) +{ + uint8_t output_byte = 0; + + MOZ_ASSERT(aNumBits <= 64); + // TODO: rewritten following with bitset? + for (size_t i = aNumBits; i > 0; i--) { + mBit |= (((aBits >> (i - 1)) & 1) << (8 - ++mBitCount)); + if (mBitCount == 8) { + Write(&mBit, sizeof(uint8_t)); + mBit = 0; + mBitCount = 0; + output_byte++; + } + } + return output_byte; +} + +uint32_t +ISOControl::Write(uint8_t* aBuf, uint32_t aSize) +{ + mOutBuffers.LastElement().AppendElements(aBuf, aSize); + mOutputSize += aSize; + return aSize; +} + +uint32_t +ISOControl::Write(uint8_t aData) +{ + MOZ_ASSERT(!mBitCount); + Write((uint8_t*)&aData, sizeof(uint8_t)); + return sizeof(uint8_t); +} + +uint32_t +ISOControl::GetBufPos() +{ + uint32_t len = mOutBuffers.Length(); + uint32_t pos = 0; + for (uint32_t i = 0; i < len; i++) { + pos += mOutBuffers.ElementAt(i).Length(); + } + return pos; +} + +uint32_t +ISOControl::WriteFourCC(const char* aType) +{ + // Bit operation should be aligned to byte before writing any byte data. + MOZ_ASSERT(!mBitCount); + + uint32_t size = strlen(aType); + if (size == 4) { + return Write((uint8_t*)aType, size); + } + + return 0; +} + +nsresult +ISOControl::GenerateFtyp() +{ + nsresult rv; + uint32_t size; + nsAutoPtr<FileTypeBox> type_box(new FileTypeBox(this)); + rv = type_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + rv = type_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + return NS_OK; +} + +nsresult +ISOControl::GenerateMoov() +{ + nsresult rv; + uint32_t size; + nsAutoPtr<MovieBox> moov_box(new MovieBox(this)); + rv = moov_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + rv = moov_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + return NS_OK; +} + +nsresult +ISOControl::GenerateMoof(uint32_t aTrackType) +{ + mFragNum++; + + nsresult rv; + uint32_t size; + uint64_t first_sample_offset = mOutputSize; + nsAutoPtr<MovieFragmentBox> moof_box(new MovieFragmentBox(aTrackType, this)); + nsAutoPtr<MediaDataBox> mdat_box(new MediaDataBox(aTrackType, this)); + + rv = moof_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + first_sample_offset += size; + rv = mdat_box->Generate(&size); + NS_ENSURE_SUCCESS(rv, rv); + first_sample_offset += mdat_box->FirstSampleOffsetInMediaDataBox(); + + // correct offset info + nsTArray<RefPtr<MuxerOperation>> tfhds; + rv = moof_box->Find(NS_LITERAL_CSTRING("tfhd"), tfhds); + NS_ENSURE_SUCCESS(rv, rv); + uint32_t len = tfhds.Length(); + for (uint32_t i = 0; i < len; i++) { + TrackFragmentHeaderBox* tfhd = (TrackFragmentHeaderBox*) tfhds.ElementAt(i).get(); + rv = tfhd->UpdateBaseDataOffset(first_sample_offset); + NS_ENSURE_SUCCESS(rv, rv); + } + + rv = moof_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = mdat_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +uint32_t +ISOControl::GetTime() +{ + return (uint64_t)time(nullptr) + iso_time_offset; +} + +} diff --git a/dom/media/encoder/fmp4_muxer/ISOControl.h b/dom/media/encoder/fmp4_muxer/ISOControl.h new file mode 100644 index 000000000..3c445caee --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOControl.h @@ -0,0 +1,250 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOCOMPOSITOR_H_ +#define ISOCOMPOSITOR_H_ + +#include "mozilla/EndianUtils.h" +#include "nsTArray.h" +#include "ISOTrackMetadata.h" +#include "EncodedFrameContainer.h" + +namespace mozilla { + +class Box; +class ISOControl; + +/** + * This class collects elementary stream data to form a fragment. + * ISOMediaWriter will check if the data is enough; if yes, the corresponding + * moof will be created and write to ISOControl. + * Each audio and video has its own fragment and only one during the whole + * life cycle, when a fragment is formed in ISOControl, Flush() needs to + * be called to reset it. + */ +class FragmentBuffer { +public: + // aTrackType: it could be Audio_Track or Video_Track. + // aFragDuration: it is the fragment duration. (microsecond per unit) + // Audio and video have the same fragment duration. + FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration); + ~FragmentBuffer(); + + // Get samples of first fragment, that will swap all the elements in the + // mFragArray[0] when aFlush = true, and caller is responsible for drop + // EncodedFrame reference count. + nsresult GetFirstFragment(nsTArray<RefPtr<EncodedFrame>>& aFragment, + bool aFlush = false); + + // Add sample frame to the last element fragment of mFragArray. If sample + // number is enough, it will append a new fragment element. And the new + // sample will be added to the new fragment element of mFragArray. + nsresult AddFrame(EncodedFrame* aFrame); + + // Get total sample size of first complete fragment size. + uint32_t GetFirstFragmentSampleSize(); + + // Get sample number of first complete fragment. + uint32_t GetFirstFragmentSampleNumber(); + + // Check if it accumulates enough frame data. + // It returns true when data is enough to form a fragment. + bool HasEnoughData(); + + // Called by ISOMediaWriter when TrackEncoder has sent the last frame. The + // remains frame data will form the last moof and move the state machine to + // in ISOMediaWriter to last phrase. + nsresult SetEndOfStream() { + mEOS = true; + return NS_OK; + } + bool EOS() { return mEOS; } + + // CSD (codec specific data), it is generated by encoder and the data depends + // on codec type. This data will be sent as a special frame from encoder to + // ISOMediaWriter and pass to this class via AddFrame(). + nsresult GetCSD(nsTArray<uint8_t>& aCSD); + + bool HasCSD() { return mCSDFrame; } + + uint32_t GetType() { return mTrackType; } + + void SetLastFragmentLastFrameTime(uint32_t aTime) { + mLastFrameTimeOfLastFragment = aTime; + } + + uint32_t GetLastFragmentLastFrameTime() { + return mLastFrameTimeOfLastFragment; + } + +private: + uint32_t mTrackType; + + // Fragment duration, microsecond per unit. + uint32_t mFragDuration; + + // Media start time, microsecond per unit. + // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(), + // when the difference between current frame time and mMediaStartTime is + // exceeded current fragment ceiling timeframe, that means current fragment has + // enough data and a new element in mFragArray will be added. + uint64_t mMediaStartTime; + + // Current fragment number. It will be increase when a new element of + // mFragArray is created. + // Note: + // It only means the fragment number of current accumulated frames, not + // the current 'creating' fragment mFragNum in ISOControl. + uint32_t mFragmentNumber; + + // The last frame time stamp of last fragment. It is for calculating the + // play duration of first frame in current fragment. The frame duration is + // defined as "current frame timestamp - last frame timestamp" here. So it + // needs to keep the last timestamp of last fragment. + uint32_t mLastFrameTimeOfLastFragment; + + // Array of fragments, each element has enough samples to form a + // complete fragment. + nsTArray<nsTArray<RefPtr<EncodedFrame>>> mFragArray; + + // Codec specific data frame, it will be generated by encoder and send to + // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends + // on codec type. + RefPtr<EncodedFrame> mCSDFrame; + + // END_OF_STREAM from ContainerWriter + bool mEOS; +}; + +/** + * ISOControl will be carried to each box when box is created. It is the main + * bridge for box to output stream to ContainerWriter and retrieve information. + * ISOControl acts 3 different roles: + * 1. Holds the pointer of audio metadata, video metadata, fragment and + * pass them to boxes. + * 2. Provide the functions to generate the base structure of MP4; they are + * GenerateFtyp, GenerateMoov, GenerateMoof, and GenerateMfra. + * 3. The actually writer used by MuxOperation::Write() in each box. It provides + * writing methods for different kind of data; they are Write, WriteArray, + * WriteBits...etc. + */ +class ISOControl { + +friend class Box; + +public: + ISOControl(uint32_t aMuxingType); + ~ISOControl(); + + nsresult GenerateFtyp(); + nsresult GenerateMoov(); + nsresult GenerateMoof(uint32_t aTrackType); + + // Swap elementary stream pointer to output buffers. + uint32_t WriteAVData(nsTArray<uint8_t>& aArray); + + uint32_t Write(uint8_t* aBuf, uint32_t aSize); + + uint32_t Write(uint8_t aData); + + template <typename T> + uint32_t Write(T aData) { + MOZ_ASSERT(!mBitCount); + + aData = NativeEndian::swapToNetworkOrder(aData); + Write((uint8_t*)&aData, sizeof(T)); + return sizeof(T); + } + + template <typename T> + uint32_t WriteArray(const T &aArray, uint32_t aSize) { + MOZ_ASSERT(!mBitCount); + + uint32_t size = 0; + for (uint32_t i = 0; i < aSize; i++) { + size += Write(aArray[i]); + } + return size; + } + + uint32_t WriteFourCC(const char* aType); + + // Bit writing. Note: it needs to be byte-boundary before using + // others non-bit writing function. + uint32_t WriteBits(uint64_t aBits, size_t aNumBits); + + // This is called by GetContainerData and swap all the buffers to aOutputBuffers. + nsresult GetBufs(nsTArray<nsTArray<uint8_t>>* aOutputBufs); + + // Presentation time in seconds since midnight, Jan. 1, 1904, in UTC time. + uint32_t GetTime(); + + // current fragment number + uint32_t GetCurFragmentNumber() { return mFragNum; } + + nsresult SetFragment(FragmentBuffer* aFragment); + FragmentBuffer* GetFragment(uint32_t aType); + + uint32_t GetMuxingType() { return mMuxingType; } + + nsresult SetMetadata(TrackMetadataBase* aTrackMeta); + nsresult GetAudioMetadata(RefPtr<AudioTrackMetadata>& aAudMeta); + nsresult GetVideoMetadata(RefPtr<VideoTrackMetadata>& aVidMeta); + + // Track ID is the Metadata index in mMetaArray. It allows only 1 audio + // track and 1 video track in this muxer. In this muxer, it is prohibt to have + // mutiple audio track or video track in the same file. + uint32_t GetTrackID(TrackMetadataBase::MetadataKind aKind); + uint32_t GetNextTrackID(); + + bool HasAudioTrack(); + bool HasVideoTrack(); + +private: + uint32_t GetBufPos(); + nsresult FlushBuf(); + + // One of value in TYPE_XXX, defined in ISOMediaWriter. + uint32_t mMuxingType; + + // Audio and video fragments are owned by ISOMediaWriter. + // They don't need to worry about pointer going stale because ISOMediaWriter's + // lifetime is longer than ISOControl. + FragmentBuffer* mAudioFragmentBuffer; + FragmentBuffer* mVideoFragmentBuffer; + + // Generated fragment number + uint32_t mFragNum; + + // The (index + 1) will be the track ID. + nsTArray<RefPtr<TrackMetadataBase>> mMetaArray; + + // Array of output buffers. + // To save memory usage, audio/video sample will be swapped into a new element + // of this array. + // + // For example, + // mOutBuffers[0] --> boxes (allocated by muxer) + // mOutBuffers[1] --> video raw data (allocated by encoder) + // mOutBuffers[2] --> video raw data (allocated by encoder) + // mOutBuffers[3] --> video raw data (allocated by encoder) + // mOutBuffers[4] --> boxes (allocated by muxer) + // mOutBuffers[5] --> audio raw data (allocated by encoder) + // ...etc. + // + nsTArray<nsTArray<uint8_t>> mOutBuffers; + + // Accumulate output size from Write(). + uint64_t mOutputSize; + + // Bit writing operation. Note: the mBitCount should be 0 before any + // byte-boundary writing method be called (Write(uint32_t), Write(uint16_t)...etc); + // otherwise, there will be assertion on these functions. + uint8_t mBitCount; + uint8_t mBit; +}; + +} +#endif diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp new file mode 100644 index 000000000..32a0c577b --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp @@ -0,0 +1,1550 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include "TrackMetadataBase.h" +#include "ISOMediaBoxes.h" +#include "ISOControl.h" +#include "ISOMediaWriter.h" +#include "EncodedFrameContainer.h" +#include "ISOTrackMetadata.h" +#include "MP4ESDS.h" +#include "AMRBox.h" +#include "AVCBox.h" +#include "EVRCBox.h" +#include "VideoUtils.h" + +namespace mozilla { + +// 14496-12 6.2.2 'Data Types and fields' +const uint32_t iso_matrix[] = { 0x00010000, 0, 0, + 0, 0x00010000, 0, + 0, 0, 0x40000000 }; + +uint32_t +set_sample_flags(bool aSync) +{ + std::bitset<32> flags; + flags.set(16, !aSync); + return flags.to_ulong(); +} + +Box::BoxSizeChecker::BoxSizeChecker(ISOControl* aControl, uint32_t aSize) +{ + mControl = aControl; + ori_size = mControl->GetBufPos(); + box_size = aSize; + MOZ_COUNT_CTOR(BoxSizeChecker); +} + +Box::BoxSizeChecker::~BoxSizeChecker() +{ + uint32_t cur_size = mControl->GetBufPos(); + if ((cur_size - ori_size) != box_size) { + MOZ_ASSERT(false); + } + + MOZ_COUNT_DTOR(BoxSizeChecker); +} + +nsresult +MediaDataBox::Generate(uint32_t* aBoxSize) +{ + mFirstSampleOffset = size; + mAllSampleSize = 0; + + if (mTrackType & Audio_Track) { + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + mAllSampleSize += frag->GetFirstFragmentSampleSize(); + } + if (mTrackType & Video_Track) { + FragmentBuffer* frag = mControl->GetFragment(Video_Track); + mAllSampleSize += frag->GetFirstFragmentSampleSize(); + } + + size += mAllSampleSize; + *aBoxSize = size; + return NS_OK; +} + +nsresult +MediaDataBox::Write() +{ + nsresult rv; + BoxSizeChecker checker(mControl, size); + Box::Write(); + nsTArray<uint32_t> types; + types.AppendElement(Audio_Track); + types.AppendElement(Video_Track); + + for (uint32_t l = 0; l < types.Length(); l++) { + if (mTrackType & types[l]) { + FragmentBuffer* frag = mControl->GetFragment(types[l]); + nsTArray<RefPtr<EncodedFrame>> frames; + + // Here is the last time we get fragment frames, flush it! + rv = frag->GetFirstFragment(frames, true); + NS_ENSURE_SUCCESS(rv, rv); + + uint32_t len = frames.Length(); + for (uint32_t i = 0; i < len; i++) { + nsTArray<uint8_t> frame_buffer; + frames.ElementAt(i)->SwapOutFrameData(frame_buffer); + mControl->WriteAVData(frame_buffer); + } + } + } + + return NS_OK; +} + +MediaDataBox::MediaDataBox(uint32_t aTrackType, ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("mdat"), aControl) + , mAllSampleSize(0) + , mFirstSampleOffset(0) + , mTrackType(aTrackType) +{ + MOZ_COUNT_CTOR(MediaDataBox); +} + +MediaDataBox::~MediaDataBox() +{ + MOZ_COUNT_DTOR(MediaDataBox); +} + +uint32_t +TrackRunBox::fillSampleTable() +{ + uint32_t table_size = 0; + nsresult rv; + nsTArray<RefPtr<EncodedFrame>> frames; + FragmentBuffer* frag = mControl->GetFragment(mTrackType); + + rv = frag->GetFirstFragment(frames); + if (NS_FAILED(rv)) { + return 0; + } + uint32_t len = frames.Length(); + sample_info_table = MakeUnique<tbl[]>(len); + // Create sample table according to 14496-12 8.8.8.2. + for (uint32_t i = 0; i < len; i++) { + // Sample size. + sample_info_table[i].sample_size = 0; + if (flags.to_ulong() & flags_sample_size_present) { + sample_info_table[i].sample_size = frames.ElementAt(i)->GetFrameData().Length(); + mAllSampleSize += sample_info_table[i].sample_size; + table_size += sizeof(uint32_t); + } + + // Sample flags. + sample_info_table[i].sample_flags = 0; + if (flags.to_ulong() & flags_sample_flags_present) { + sample_info_table[i].sample_flags = + set_sample_flags( + (frames.ElementAt(i)->GetFrameType() == EncodedFrame::AVC_I_FRAME)); + table_size += sizeof(uint32_t); + } + + // Sample duration. + sample_info_table[i].sample_duration = 0; + if (flags.to_ulong() & flags_sample_duration_present) { + // Calculate each frame's duration, it is decided by "current frame + // timestamp - last frame timestamp". + uint64_t frame_time = 0; + if (i == 0) { + frame_time = frames.ElementAt(i)->GetTimeStamp() - + frag->GetLastFragmentLastFrameTime(); + } else { + frame_time = frames.ElementAt(i)->GetTimeStamp() - + frames.ElementAt(i - 1)->GetTimeStamp(); + // Keep the last frame time of current fagment, it will be used to calculate + // the first frame duration of next fragment. + if ((len - 1) == i) { + frag->SetLastFragmentLastFrameTime(frames.ElementAt(i)->GetTimeStamp()); + } + } + + // In TrackRunBox, there should be exactly one type, either audio or video. + MOZ_ASSERT((mTrackType & Video_Track) ^ (mTrackType & Audio_Track)); + sample_info_table[i].sample_duration = (mTrackType & Video_Track ? + frame_time * mVideoMeta->GetVideoClockRate() / USECS_PER_S : + frame_time * mAudioMeta->GetAudioSampleRate() / USECS_PER_S); + + table_size += sizeof(uint32_t); + } + + sample_info_table[i].sample_composition_time_offset = 0; + } + return table_size; +} + +nsresult +TrackRunBox::Generate(uint32_t* aBoxSize) +{ + FragmentBuffer* frag = mControl->GetFragment(mTrackType); + sample_count = frag->GetFirstFragmentSampleNumber(); + size += sizeof(sample_count); + + // data_offset needs to be updated if there is other + // TrackRunBox before this one. + if (flags.to_ulong() & flags_data_offset_present) { + data_offset = 0; + size += sizeof(data_offset); + } + size += fillSampleTable(); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +TrackRunBox::SetDataOffset(uint32_t aOffset) +{ + data_offset = aOffset; + return NS_OK; +} + +nsresult +TrackRunBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(sample_count); + if (flags.to_ulong() & flags_data_offset_present) { + mControl->Write(data_offset); + } + for (uint32_t i = 0; i < sample_count; i++) { + if (flags.to_ulong() & flags_sample_duration_present) { + mControl->Write(sample_info_table[i].sample_duration); + } + if (flags.to_ulong() & flags_sample_size_present) { + mControl->Write(sample_info_table[i].sample_size); + } + if (flags.to_ulong() & flags_sample_flags_present) { + mControl->Write(sample_info_table[i].sample_flags); + } + } + + return NS_OK; +} + +TrackRunBox::TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("trun"), 0, aFlags, aControl) + , sample_count(0) + , data_offset(0) + , first_sample_flags(0) + , mAllSampleSize(0) + , mTrackType(aType) +{ + MOZ_COUNT_CTOR(TrackRunBox); +} + +TrackRunBox::~TrackRunBox() +{ + MOZ_COUNT_DTOR(TrackRunBox); +} + +nsresult +TrackFragmentHeaderBox::UpdateBaseDataOffset(uint64_t aOffset) +{ + base_data_offset = aOffset; + return NS_OK; +} + +nsresult +TrackFragmentHeaderBox::Generate(uint32_t* aBoxSize) +{ + track_ID = (mTrackType == Audio_Track ? + mControl->GetTrackID(mAudioMeta->GetKind()) : + mControl->GetTrackID(mVideoMeta->GetKind())); + size += sizeof(track_ID); + + if (flags.to_ulong() & base_data_offset_present) { + // base_data_offset needs to add size of 'trun', 'tfhd' and + // header of 'mdat' later. + base_data_offset = 0; + size += sizeof(base_data_offset); + } + if (flags.to_ulong() & default_sample_duration_present) { + if (mTrackType == Video_Track) { + if (!mVideoMeta->GetVideoFrameRate()) { + // 0 means frame rate is variant, so it is wrong to write + // default_sample_duration. + MOZ_ASSERT(0); + default_sample_duration = 0; + } else { + default_sample_duration = mVideoMeta->GetVideoClockRate() / mVideoMeta->GetVideoFrameRate(); + } + } else if (mTrackType == Audio_Track) { + default_sample_duration = mAudioMeta->GetAudioFrameDuration(); + } else { + MOZ_ASSERT(0); + return NS_ERROR_FAILURE; + } + size += sizeof(default_sample_duration); + } + *aBoxSize = size; + return NS_OK; +} + +nsresult +TrackFragmentHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(track_ID); + if (flags.to_ulong() & base_data_offset_present) { + mControl->Write(base_data_offset); + } + if (flags.to_ulong() & default_sample_duration_present) { + mControl->Write(default_sample_duration); + } + return NS_OK; +} + +TrackFragmentHeaderBox::TrackFragmentHeaderBox(uint32_t aType, + uint32_t aFlags, + ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("tfhd"), 0, aFlags, aControl) + , track_ID(0) + , base_data_offset(0) + , default_sample_duration(0) +{ + mTrackType = aType; + MOZ_COUNT_CTOR(TrackFragmentHeaderBox); +} + +TrackFragmentHeaderBox::~TrackFragmentHeaderBox() +{ + MOZ_COUNT_DTOR(TrackFragmentHeaderBox); +} + +TrackFragmentBox::TrackFragmentBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("traf"), aControl) + , mTrackType(aType) +{ + // Flags in TrackFragmentHeaderBox. + uint32_t tf_flags = base_data_offset_present; + + // Ideally, audio encoder generates audio frame in const rate. However, some + // audio encoders don't do it so the audio frame duration needs to be checked + // here. + if ((mTrackType & Audio_Track) && mAudioMeta->GetAudioFrameDuration()) { + tf_flags |= default_sample_duration_present; + } + + boxes.AppendElement(new TrackFragmentHeaderBox(aType, tf_flags, aControl)); + + // Always adds flags_data_offset_present in each TrackRunBox, Android + // parser requires this flag to calculate the correct bitstream offset. + uint32_t tr_flags = flags_sample_size_present | flags_data_offset_present; + + // Flags in TrackRunBox. + // If there is no default sample duration exists, each frame duration needs to + // be recored in the TrackRunBox. + tr_flags |= (tf_flags & default_sample_duration_present ? 0 : flags_sample_duration_present); + + // For video, add sample_flags to record I frame. + tr_flags |= (mTrackType & Video_Track ? flags_sample_flags_present : 0); + + boxes.AppendElement(new TrackRunBox(mTrackType, tr_flags, aControl)); + MOZ_COUNT_CTOR(TrackFragmentBox); +} + +TrackFragmentBox::~TrackFragmentBox() +{ + MOZ_COUNT_DTOR(TrackFragmentBox); +} + +nsresult +MovieFragmentHeaderBox::Generate(uint32_t* aBoxSize) +{ + sequence_number = mControl->GetCurFragmentNumber(); + size += sizeof(sequence_number); + *aBoxSize = size; + return NS_OK; +} + +nsresult +MovieFragmentHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(sequence_number); + return NS_OK; +} + +MovieFragmentHeaderBox::MovieFragmentHeaderBox(uint32_t aTrackType, + ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("mfhd"), 0, 0, aControl) + , sequence_number(0) + , mTrackType(aTrackType) +{ + MOZ_COUNT_CTOR(MovieFragmentHeaderBox); +} + +MovieFragmentHeaderBox::~MovieFragmentHeaderBox() +{ + MOZ_COUNT_DTOR(MovieFragmentHeaderBox); +} + +MovieFragmentBox::MovieFragmentBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("moof"), aControl) + , mTrackType(aType) +{ + boxes.AppendElement(new MovieFragmentHeaderBox(mTrackType, aControl)); + + if (mTrackType & Audio_Track) { + boxes.AppendElement( + new TrackFragmentBox(Audio_Track, aControl)); + } + if (mTrackType & Video_Track) { + boxes.AppendElement( + new TrackFragmentBox(Video_Track, aControl)); + } + MOZ_COUNT_CTOR(MovieFragmentBox); +} + +MovieFragmentBox::~MovieFragmentBox() +{ + MOZ_COUNT_DTOR(MovieFragmentBox); +} + +nsresult +MovieFragmentBox::Generate(uint32_t* aBoxSize) +{ + nsresult rv = DefaultContainerImpl::Generate(aBoxSize); + NS_ENSURE_SUCCESS(rv, rv); + + // Correct data_offset if there are both audio and video track in + // this fragment. This offset means the offset in the MediaDataBox. + if (mTrackType & (Audio_Track | Video_Track)) { + nsTArray<RefPtr<MuxerOperation>> truns; + rv = Find(NS_LITERAL_CSTRING("trun"), truns); + NS_ENSURE_SUCCESS(rv, rv); + uint32_t len = truns.Length(); + uint32_t data_offset = 0; + for (uint32_t i = 0; i < len; i++) { + TrackRunBox* trun = (TrackRunBox*) truns.ElementAt(i).get(); + rv = trun->SetDataOffset(data_offset); + NS_ENSURE_SUCCESS(rv, rv); + data_offset += trun->GetAllSampleSize(); + } + } + + return NS_OK; +} + +nsresult +TrackExtendsBox::Generate(uint32_t* aBoxSize) +{ + track_ID = (mTrackType == Audio_Track ? + mControl->GetTrackID(mAudioMeta->GetKind()) : + mControl->GetTrackID(mVideoMeta->GetKind())); + + if (mTrackType == Audio_Track) { + default_sample_description_index = 1; + default_sample_duration = mAudioMeta->GetAudioFrameDuration(); + default_sample_size = mAudioMeta->GetAudioFrameSize(); + default_sample_flags = set_sample_flags(1); + } else if (mTrackType == Video_Track) { + default_sample_description_index = 1; + // Video meta data has assigned framerate, it implies that this video's + // frame rate should be fixed. + if (mVideoMeta->GetVideoFrameRate()) { + default_sample_duration = + mVideoMeta->GetVideoClockRate() / mVideoMeta->GetVideoFrameRate(); + } + default_sample_size = 0; + default_sample_flags = set_sample_flags(0); + } else { + MOZ_ASSERT(0); + return NS_ERROR_FAILURE; + } + + size += sizeof(track_ID) + + sizeof(default_sample_description_index) + + sizeof(default_sample_duration) + + sizeof(default_sample_size) + + sizeof(default_sample_flags); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +TrackExtendsBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(track_ID); + mControl->Write(default_sample_description_index); + mControl->Write(default_sample_duration); + mControl->Write(default_sample_size); + mControl->Write(default_sample_flags); + + return NS_OK; +} + +TrackExtendsBox::TrackExtendsBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("trex"), 0, 0, aControl) + , track_ID(0) + , default_sample_description_index(0) + , default_sample_duration(0) + , default_sample_size(0) + , default_sample_flags(0) + , mTrackType(aType) +{ + MOZ_COUNT_CTOR(TrackExtendsBox); +} + +TrackExtendsBox::~TrackExtendsBox() +{ + MOZ_COUNT_DTOR(TrackExtendsBox); +} + +MovieExtendsBox::MovieExtendsBox(ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("mvex"), aControl) +{ + if (mAudioMeta) { + boxes.AppendElement(new TrackExtendsBox(Audio_Track, aControl)); + } + if (mVideoMeta) { + boxes.AppendElement(new TrackExtendsBox(Video_Track, aControl)); + } + MOZ_COUNT_CTOR(MovieExtendsBox); +} + +MovieExtendsBox::~MovieExtendsBox() +{ + MOZ_COUNT_DTOR(MovieExtendsBox); +} + +nsresult +ChunkOffsetBox::Generate(uint32_t* aBoxSize) +{ + // We don't need time to sample table in fragmented mp4. + entry_count = 0; + size += sizeof(entry_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +ChunkOffsetBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + return NS_OK; +} + +ChunkOffsetBox::ChunkOffsetBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stco"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(ChunkOffsetBox); +} + +ChunkOffsetBox::~ChunkOffsetBox() +{ + MOZ_COUNT_DTOR(ChunkOffsetBox); +} + +nsresult +SampleToChunkBox::Generate(uint32_t* aBoxSize) +{ + // We don't need time to sample table in fragmented mp4 + entry_count = 0; + size += sizeof(entry_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +SampleToChunkBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + return NS_OK; +} + +SampleToChunkBox::SampleToChunkBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stsc"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(SampleToChunkBox); +} + +SampleToChunkBox::~SampleToChunkBox() +{ + MOZ_COUNT_DTOR(SampleToChunkBox); +} + +nsresult +TimeToSampleBox::Generate(uint32_t* aBoxSize) +{ + // We don't need time to sample table in fragmented mp4. + entry_count = 0; + size += sizeof(entry_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +TimeToSampleBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + return NS_OK; +} + +TimeToSampleBox::TimeToSampleBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stts"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(TimeToSampleBox); +} + +TimeToSampleBox::~TimeToSampleBox() +{ + MOZ_COUNT_DTOR(TimeToSampleBox); +} + +nsresult +SampleDescriptionBox::Generate(uint32_t* aBoxSize) +{ + entry_count = 1; + size += sizeof(entry_count); + + nsresult rv; + uint32_t box_size; + rv = sample_entry_box->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + *aBoxSize = size; + + return NS_OK; +} + +nsresult +SampleDescriptionBox::Write() +{ + WRITE_FULLBOX(mControl, size) + nsresult rv; + mControl->Write(entry_count); + rv = sample_entry_box->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +SampleDescriptionBox::SampleDescriptionBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stsd"), 0, 0, aControl) + , entry_count(0) +{ + mTrackType = aType; + + switch (mTrackType) { + case Audio_Track: + { + CreateAudioSampleEntry(sample_entry_box); + } + break; + case Video_Track: + { + CreateVideoSampleEntry(sample_entry_box); + } + break; + } + MOZ_ASSERT(sample_entry_box); + MOZ_COUNT_CTOR(SampleDescriptionBox); +} + +nsresult +SampleDescriptionBox::CreateAudioSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry) +{ + if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_AMR) { + aSampleEntry = new AMRSampleEntry(mControl); + } else if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_AAC) { + aSampleEntry = new MP4AudioSampleEntry(mControl); + } else if (mAudioMeta->GetKind() == TrackMetadataBase::METADATA_EVRC) { + aSampleEntry = new EVRCSampleEntry(mControl); + } else { + MOZ_ASSERT(0); + } + return NS_OK; +} + +nsresult +SampleDescriptionBox::CreateVideoSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry) +{ + if (mVideoMeta->GetKind() == TrackMetadataBase::METADATA_AVC) { + aSampleEntry = new AVCSampleEntry(mControl); + } else { + MOZ_ASSERT(0); + } + return NS_OK; +} + +SampleDescriptionBox::~SampleDescriptionBox() +{ + MOZ_COUNT_DTOR(SampleDescriptionBox); +} + +nsresult +SampleSizeBox::Generate(uint32_t* aBoxSize) +{ + size += sizeof(sample_size) + + sizeof(sample_count); + *aBoxSize = size; + return NS_OK; +} + +nsresult +SampleSizeBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(sample_size); + mControl->Write(sample_count); + return NS_OK; +} + +SampleSizeBox::SampleSizeBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("stsz"), 0, 0, aControl) + , sample_size(0) + , sample_count(0) +{ + MOZ_COUNT_CTOR(SampleSizeBox); +} + +SampleSizeBox::~SampleSizeBox() +{ + MOZ_COUNT_DTOR(SampleSizeBox); +} + +SampleTableBox::SampleTableBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("stbl"), aControl) +{ + boxes.AppendElement(new SampleDescriptionBox(aType, aControl)); + boxes.AppendElement(new TimeToSampleBox(aType, aControl)); + boxes.AppendElement(new SampleToChunkBox(aType, aControl)); + boxes.AppendElement(new SampleSizeBox(aControl)); + boxes.AppendElement(new ChunkOffsetBox(aType, aControl)); + MOZ_COUNT_CTOR(SampleTableBox); +} + +SampleTableBox::~SampleTableBox() +{ + MOZ_COUNT_DTOR(SampleTableBox); +} + +nsresult +DataEntryUrlBox::Generate(uint32_t* aBoxSize) +{ + // location is null here, do nothing + size += location.Length(); + *aBoxSize = size; + + return NS_OK; +} + +nsresult +DataEntryUrlBox::Write() +{ + WRITE_FULLBOX(mControl, size) + return NS_OK; +} + +DataEntryUrlBox::DataEntryUrlBox() + : FullBox(NS_LITERAL_CSTRING("url "), 0, 0, (ISOControl*) nullptr) +{ + MOZ_COUNT_CTOR(DataEntryUrlBox); +} + +DataEntryUrlBox::DataEntryUrlBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("url "), 0, flags_media_at_the_same_file, aControl) +{ + MOZ_COUNT_CTOR(DataEntryUrlBox); +} + +DataEntryUrlBox::DataEntryUrlBox(const DataEntryUrlBox& aBox) + : FullBox(aBox.boxType, aBox.version, aBox.flags.to_ulong(), aBox.mControl) +{ + location = aBox.location; + MOZ_COUNT_CTOR(DataEntryUrlBox); +} + +DataEntryUrlBox::~DataEntryUrlBox() +{ + MOZ_COUNT_DTOR(DataEntryUrlBox); +} + +nsresult DataReferenceBox::Generate(uint32_t* aBoxSize) +{ + entry_count = 1; // only allow on entry here + size += sizeof(uint32_t); + + for (uint32_t i = 0; i < entry_count; i++) { + uint32_t box_size = 0; + DataEntryUrlBox* url = new DataEntryUrlBox(mControl); + url->Generate(&box_size); + size += box_size; + urls.AppendElement(url); + } + + *aBoxSize = size; + + return NS_OK; +} + +nsresult DataReferenceBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(entry_count); + + for (uint32_t i = 0; i < entry_count; i++) { + urls[i]->Write(); + } + + return NS_OK; +} + +DataReferenceBox::DataReferenceBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("dref"), 0, 0, aControl) + , entry_count(0) +{ + MOZ_COUNT_CTOR(DataReferenceBox); +} + +DataReferenceBox::~DataReferenceBox() +{ + MOZ_COUNT_DTOR(DataReferenceBox); +} + +DataInformationBox::DataInformationBox(ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("dinf"), aControl) +{ + boxes.AppendElement(new DataReferenceBox(aControl)); + MOZ_COUNT_CTOR(DataInformationBox); +} + +DataInformationBox::~DataInformationBox() +{ + MOZ_COUNT_DTOR(DataInformationBox); +} + +nsresult +VideoMediaHeaderBox::Generate(uint32_t* aBoxSize) +{ + size += sizeof(graphicsmode) + + sizeof(opcolor); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +VideoMediaHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(graphicsmode); + mControl->WriteArray(opcolor, 3); + return NS_OK; +} + +VideoMediaHeaderBox::VideoMediaHeaderBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("vmhd"), 0, 1, aControl) + , graphicsmode(0) +{ + memset(opcolor, 0 , sizeof(opcolor)); + MOZ_COUNT_CTOR(VideoMediaHeaderBox); +} + +VideoMediaHeaderBox::~VideoMediaHeaderBox() +{ + MOZ_COUNT_DTOR(VideoMediaHeaderBox); +} + +nsresult +SoundMediaHeaderBox::Generate(uint32_t* aBoxSize) +{ + balance = 0; + reserved = 0; + size += sizeof(balance) + + sizeof(reserved); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +SoundMediaHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(balance); + mControl->Write(reserved); + + return NS_OK; +} + +SoundMediaHeaderBox::SoundMediaHeaderBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("smhd"), 0, 0, aControl) +{ + MOZ_COUNT_CTOR(SoundMediaHeaderBox); +} + +SoundMediaHeaderBox::~SoundMediaHeaderBox() +{ + MOZ_COUNT_DTOR(SoundMediaHeaderBox); +} + +MediaInformationBox::MediaInformationBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("minf"), aControl) +{ + mTrackType = aType; + + if (mTrackType == Audio_Track) { + boxes.AppendElement(new SoundMediaHeaderBox(aControl)); + } else if (mTrackType == Video_Track) { + boxes.AppendElement(new VideoMediaHeaderBox(aControl)); + } else { + MOZ_ASSERT(0); + } + + boxes.AppendElement(new DataInformationBox(aControl)); + boxes.AppendElement(new SampleTableBox(aType, aControl)); + MOZ_COUNT_CTOR(MediaInformationBox); +} + +MediaInformationBox::~MediaInformationBox() +{ + MOZ_COUNT_DTOR(MediaInformationBox); +} + +nsresult +HandlerBox::Generate(uint32_t* aBoxSize) +{ + pre_defined = 0; + if (mTrackType == Audio_Track) { + handler_type = FOURCC('s', 'o', 'u', 'n'); + } else if (mTrackType == Video_Track) { + handler_type = FOURCC('v', 'i', 'd', 'e'); + } + + size += sizeof(pre_defined) + + sizeof(handler_type) + + sizeof(reserved); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +HandlerBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(pre_defined); + mControl->Write(handler_type); + mControl->WriteArray(reserved, 3); + + return NS_OK; +} + +HandlerBox::HandlerBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("hdlr"), 0, 0, aControl) + , pre_defined(0) + , handler_type(0) +{ + mTrackType = aType; + memset(reserved, 0 , sizeof(reserved)); + MOZ_COUNT_CTOR(HandlerBox); +} + +HandlerBox::~HandlerBox() +{ + MOZ_COUNT_DTOR(HandlerBox); +} + +MediaHeaderBox::MediaHeaderBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("mdhd"), 0, 0, aControl) + , creation_time(0) + , modification_time(0) + , timescale(0) + , duration(0) + , pad(0) + , lang1(0) + , lang2(0) + , lang3(0) + , pre_defined(0) +{ + mTrackType = aType; + MOZ_COUNT_CTOR(MediaHeaderBox); +} + +MediaHeaderBox::~MediaHeaderBox() +{ + MOZ_COUNT_DTOR(MediaHeaderBox); +} + +uint32_t +MediaHeaderBox::GetTimeScale() +{ + if (mTrackType == Audio_Track) { + return mAudioMeta->GetAudioSampleRate(); + } + + return mVideoMeta->GetVideoClockRate(); +} + +nsresult +MediaHeaderBox::Generate(uint32_t* aBoxSize) +{ + creation_time = mControl->GetTime(); + modification_time = mControl->GetTime(); + timescale = GetTimeScale(); + duration = 0; // fragmented mp4 + + pad = 0; + lang1 = 'u' - 0x60; // "und" underdetermined language + lang2 = 'n' - 0x60; + lang3 = 'd' - 0x60; + size += (pad.size() + lang1.size() + lang2.size() + lang3.size()) / CHAR_BIT; + + pre_defined = 0; + size += sizeof(creation_time) + + sizeof(modification_time) + + sizeof(timescale) + + sizeof(duration) + + sizeof(pre_defined); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +MediaHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(creation_time); + mControl->Write(modification_time); + mControl->Write(timescale); + mControl->Write(duration); + mControl->WriteBits(pad.to_ulong(), pad.size()); + mControl->WriteBits(lang1.to_ulong(), lang1.size()); + mControl->WriteBits(lang2.to_ulong(), lang2.size()); + mControl->WriteBits(lang3.to_ulong(), lang3.size()); + mControl->Write(pre_defined); + + return NS_OK; +} + +MovieBox::MovieBox(ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("moov"), aControl) +{ + boxes.AppendElement(new MovieHeaderBox(aControl)); + if (aControl->HasAudioTrack()) { + boxes.AppendElement(new TrackBox(Audio_Track, aControl)); + } + if (aControl->HasVideoTrack()) { + boxes.AppendElement(new TrackBox(Video_Track, aControl)); + } + boxes.AppendElement(new MovieExtendsBox(aControl)); + MOZ_COUNT_CTOR(MovieBox); +} + +MovieBox::~MovieBox() +{ + MOZ_COUNT_DTOR(MovieBox); +} + +nsresult +MovieHeaderBox::Generate(uint32_t* aBoxSize) +{ + creation_time = mControl->GetTime(); + modification_time = mControl->GetTime(); + timescale = GetTimeScale(); + duration = 0; // The duration is always 0 in fragmented mp4. + next_track_ID = mControl->GetNextTrackID(); + + size += sizeof(next_track_ID) + + sizeof(creation_time) + + sizeof(modification_time) + + sizeof(timescale) + + sizeof(duration) + + sizeof(rate) + + sizeof(volume) + + sizeof(reserved16) + + sizeof(reserved32) + + sizeof(matrix) + + sizeof(pre_defined); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +MovieHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(creation_time); + mControl->Write(modification_time); + mControl->Write(timescale); + mControl->Write(duration); + mControl->Write(rate); + mControl->Write(volume); + mControl->Write(reserved16); + mControl->WriteArray(reserved32, 2); + mControl->WriteArray(matrix, 9); + mControl->WriteArray(pre_defined, 6); + mControl->Write(next_track_ID); + + return NS_OK; +} + +uint32_t +MovieHeaderBox::GetTimeScale() +{ + // Only audio track in container. + if (mAudioMeta && !mVideoMeta) { + return mAudioMeta->GetAudioSampleRate(); + } + + // return video rate + return mVideoMeta->GetVideoClockRate(); +} + +MovieHeaderBox::~MovieHeaderBox() +{ + MOZ_COUNT_DTOR(MovieHeaderBox); +} + +MovieHeaderBox::MovieHeaderBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("mvhd"), 0, 0, aControl) + , creation_time(0) + , modification_time(0) + , timescale(90000) + , duration(0) + , rate(0x00010000) + , volume(0x0100) + , reserved16(0) + , next_track_ID(1) +{ + memcpy(matrix, iso_matrix, sizeof(matrix)); + memset(reserved32, 0, sizeof(reserved32)); + memset(pre_defined, 0, sizeof(pre_defined)); + MOZ_COUNT_CTOR(MovieHeaderBox); +} + +TrackHeaderBox::TrackHeaderBox(uint32_t aType, ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("tkhd"), 0, + flags_track_enabled | flags_track_in_movie | flags_track_in_preview, + aControl) + , creation_time(0) + , modification_time(0) + , track_ID(0) + , reserved(0) + , duration(0) + , layer(0) + , alternate_group(0) + , volume(0) + , reserved3(0) + , width(0) + , height(0) +{ + mTrackType = aType; + memcpy(matrix, iso_matrix, sizeof(matrix)); + memset(reserved2, 0, sizeof(reserved2)); + MOZ_COUNT_CTOR(TrackHeaderBox); +} + +TrackHeaderBox::~TrackHeaderBox() +{ + MOZ_COUNT_DTOR(TrackHeaderBox); +} + +nsresult +TrackHeaderBox::Generate(uint32_t* aBoxSize) +{ + creation_time = mControl->GetTime(); + modification_time = mControl->GetTime(); + track_ID = (mTrackType == Audio_Track ? + mControl->GetTrackID(mAudioMeta->GetKind()) : + mControl->GetTrackID(mVideoMeta->GetKind())); + // fragmented mp4 + duration = 0; + + // volume, audiotrack is always 0x0100 in 14496-12 8.3.2.2 + volume = (mTrackType == Audio_Track ? 0x0100 : 0); + + if (mTrackType == Video_Track) { + width = mVideoMeta->GetVideoDisplayWidth() << 16; + height = mVideoMeta->GetVideoDisplayHeight() << 16; + // Check display size, using the pixel size if any of them is invalid. + if (!width || !height) { + width = mVideoMeta->GetVideoWidth() << 16; + height = mVideoMeta->GetVideoHeight() << 16; + } + } + + size += sizeof(creation_time) + + sizeof(modification_time) + + sizeof(track_ID) + + sizeof(reserved) + + sizeof(duration) + + sizeof(reserved2) + + sizeof(layer) + + sizeof(alternate_group) + + sizeof(volume) + + sizeof(reserved3) + + sizeof(matrix) + + sizeof(width) + + sizeof(height); + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +TrackHeaderBox::Write() +{ + WRITE_FULLBOX(mControl, size) + mControl->Write(creation_time); + mControl->Write(modification_time); + mControl->Write(track_ID); + mControl->Write(reserved); + mControl->Write(duration); + mControl->WriteArray(reserved2, 2); + mControl->Write(layer); + mControl->Write(alternate_group); + mControl->Write(volume); + mControl->Write(reserved3); + mControl->WriteArray(matrix, 9); + mControl->Write(width); + mControl->Write(height); + + return NS_OK; +} + +nsresult +FileTypeBox::Generate(uint32_t* aBoxSize) +{ + minor_version = 0; + + if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_MP4) { + if (!mControl->HasVideoTrack() && mControl->HasAudioTrack()) { + major_brand = "M4A "; + } else { + major_brand = "MP42"; + } + compatible_brands.AppendElement("mp42"); + compatible_brands.AppendElement("isom"); + } else if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_3GP) { + major_brand = "3gp9"; + // According to 3GPP TS 26.244 V12.2.0, section 5.3.4, it's recommended to + // list all compatible brands here. 3GP spec supports fragment from '3gp6'. + compatible_brands.AppendElement("3gp9"); + compatible_brands.AppendElement("3gp8"); + compatible_brands.AppendElement("3gp7"); + compatible_brands.AppendElement("3gp6"); + compatible_brands.AppendElement("isom"); + } else if (mControl->GetMuxingType() == ISOMediaWriter::TYPE_FRAG_3G2) { + major_brand = "3g2a"; + // 3GPP2 Release 0 and A and 3GPP Release 6 allow movie fragmentation + compatible_brands.AppendElement("3gp9"); + compatible_brands.AppendElement("3gp8"); + compatible_brands.AppendElement("3gp7"); + compatible_brands.AppendElement("3gp6"); + compatible_brands.AppendElement("isom"); + compatible_brands.AppendElement("3g2c"); + compatible_brands.AppendElement("3g2b"); + compatible_brands.AppendElement("3g2a"); + } else { + MOZ_ASSERT(0); + } + + size += major_brand.Length() + + sizeof(minor_version) + + compatible_brands.Length() * 4; + + *aBoxSize = size; + + return NS_OK; +} + +nsresult +FileTypeBox::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + mControl->WriteFourCC(major_brand.get()); + mControl->Write(minor_version); + uint32_t len = compatible_brands.Length(); + for (uint32_t i = 0; i < len; i++) { + mControl->WriteFourCC(compatible_brands[i].get()); + } + + return NS_OK; +} + +FileTypeBox::FileTypeBox(ISOControl* aControl) + : Box(NS_LITERAL_CSTRING("ftyp"), aControl) + , minor_version(0) +{ + MOZ_COUNT_CTOR(FileTypeBox); +} + +FileTypeBox::~FileTypeBox() +{ + MOZ_COUNT_DTOR(FileTypeBox); +} + +MediaBox::MediaBox(uint32_t aType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("mdia"), aControl) +{ + mTrackType = aType; + boxes.AppendElement(new MediaHeaderBox(aType, aControl)); + boxes.AppendElement(new HandlerBox(aType, aControl)); + boxes.AppendElement(new MediaInformationBox(aType, aControl)); + MOZ_COUNT_CTOR(MediaBox); +} + +MediaBox::~MediaBox() +{ + MOZ_COUNT_DTOR(MediaBox); +} + +nsresult +DefaultContainerImpl::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + uint32_t box_size; + uint32_t len = boxes.Length(); + for (uint32_t i = 0; i < len; i++) { + rv = boxes.ElementAt(i)->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + } + *aBoxSize = size; + return NS_OK; +} + +nsresult +DefaultContainerImpl::Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) +{ + nsresult rv = Box::Find(aType, aOperations); + NS_ENSURE_SUCCESS(rv, rv); + + uint32_t len = boxes.Length(); + for (uint32_t i = 0; i < len; i++) { + rv = boxes.ElementAt(i)->Find(aType, aOperations); + NS_ENSURE_SUCCESS(rv, rv); + } + return NS_OK; +} + +nsresult +DefaultContainerImpl::Write() +{ + BoxSizeChecker checker(mControl, size); + Box::Write(); + + nsresult rv; + uint32_t len = boxes.Length(); + for (uint32_t i = 0; i < len; i++) { + rv = boxes.ElementAt(i)->Write(); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +DefaultContainerImpl::DefaultContainerImpl(const nsACString& aType, + ISOControl* aControl) + : Box(aType, aControl) +{ +} + +nsresult +Box::Write() +{ + mControl->Write(size); + mControl->WriteFourCC(boxType.get()); + return NS_OK; +} + +nsresult +Box::Find(const nsACString& aType, nsTArray<RefPtr<MuxerOperation>>& aOperations) +{ + if (boxType == aType) { + aOperations.AppendElement(this); + } + return NS_OK; +} + +Box::Box(const nsACString& aType, ISOControl* aControl) + : size(8), mControl(aControl) +{ + MOZ_ASSERT(aType.Length() == 4); + boxType = aType; + aControl->GetAudioMetadata(mAudioMeta); + aControl->GetVideoMetadata(mVideoMeta); +} + +FullBox::FullBox(const nsACString& aType, uint8_t aVersion, uint32_t aFlags, + ISOControl* aControl) + : Box(aType, aControl) +{ + std::bitset<24> tmp_flags(aFlags); + version = aVersion; + flags = tmp_flags; + size += sizeof(version) + flags.size() / CHAR_BIT; +} + +nsresult +FullBox::Write() +{ + Box::Write(); + mControl->Write(version); + mControl->WriteBits(flags.to_ulong(), flags.size()); + return NS_OK; +} + +TrackBox::TrackBox(uint32_t aTrackType, ISOControl* aControl) + : DefaultContainerImpl(NS_LITERAL_CSTRING("trak"), aControl) +{ + boxes.AppendElement(new TrackHeaderBox(aTrackType, aControl)); + boxes.AppendElement(new MediaBox(aTrackType, aControl)); + MOZ_COUNT_CTOR(TrackBox); +} + +TrackBox::~TrackBox() +{ + MOZ_COUNT_DTOR(TrackBox); +} + +SampleEntryBox::SampleEntryBox(const nsACString& aFormat, ISOControl* aControl) + : Box(aFormat, aControl) + , data_reference_index(0) +{ + data_reference_index = 1; // There is only one data reference in each track. + size += sizeof(reserved) + + sizeof(data_reference_index); + memset(reserved, 0, sizeof(reserved)); +} + +nsresult +SampleEntryBox::Write() +{ + Box::Write(); + mControl->Write(reserved, sizeof(reserved)); + mControl->Write(data_reference_index); + return NS_OK; +} + +nsresult +AudioSampleEntry::Write() +{ + SampleEntryBox::Write(); + mControl->Write(sound_version); + mControl->Write(reserved2, sizeof(reserved2)); + mControl->Write(channels); + mControl->Write(sample_size); + mControl->Write(compressionId); + mControl->Write(packet_size); + mControl->Write(timeScale); + return NS_OK; +} + +AudioSampleEntry::AudioSampleEntry(const nsACString& aFormat, ISOControl* aControl) + : SampleEntryBox(aFormat, aControl) + , sound_version(0) + , channels(2) + , sample_size(16) + , compressionId(0) + , packet_size(0) + , timeScale(0) +{ + memset(reserved2, 0 , sizeof(reserved2)); + channels = mAudioMeta->GetAudioChannels(); + timeScale = mAudioMeta->GetAudioSampleRate() << 16; + + size += sizeof(sound_version) + + sizeof(reserved2) + + sizeof(sample_size) + + sizeof(channels) + + sizeof(packet_size) + + sizeof(compressionId) + + sizeof(timeScale); + + MOZ_COUNT_CTOR(AudioSampleEntry); +} + +AudioSampleEntry::~AudioSampleEntry() +{ + MOZ_COUNT_DTOR(AudioSampleEntry); +} + +nsresult +VisualSampleEntry::Write() +{ + SampleEntryBox::Write(); + + mControl->Write(reserved, sizeof(reserved)); + mControl->Write(width); + mControl->Write(height); + mControl->Write(horizresolution); + mControl->Write(vertresolution); + mControl->Write(reserved2); + mControl->Write(frame_count); + mControl->Write(compressorName, sizeof(compressorName)); + mControl->Write(depth); + mControl->Write(pre_defined); + + return NS_OK; +} + +VisualSampleEntry::VisualSampleEntry(const nsACString& aFormat, ISOControl* aControl) + : SampleEntryBox(aFormat, aControl) + , width(0) + , height(0) + , horizresolution(resolution_72_dpi) + , vertresolution(resolution_72_dpi) + , reserved2(0) + , frame_count(1) + , depth(video_depth) + , pre_defined(-1) +{ + memset(reserved, 0 , sizeof(reserved)); + memset(compressorName, 0 , sizeof(compressorName)); + + // both fields occupy 16 bits defined in 14496-2 6.2.3. + width = mVideoMeta->GetVideoWidth(); + height = mVideoMeta->GetVideoHeight(); + + size += sizeof(reserved) + + sizeof(width) + + sizeof(height) + + sizeof(horizresolution) + + sizeof(vertresolution) + + sizeof(reserved2) + + sizeof(frame_count) + + sizeof(compressorName) + + sizeof(depth) + + sizeof(pre_defined); + + MOZ_COUNT_CTOR(VisualSampleEntry); +} + +VisualSampleEntry::~VisualSampleEntry() +{ + MOZ_COUNT_DTOR(VisualSampleEntry); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h new file mode 100644 index 000000000..a6dc1b046 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaBoxes.h @@ -0,0 +1,781 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOMediaBoxes_h_ +#define ISOMediaBoxes_h_ + +#include <bitset> +#include "nsString.h" +#include "nsTArray.h" +#include "nsAutoPtr.h" +#include "MuxerOperation.h" +#include "mozilla/UniquePtr.h" + +#define WRITE_FULLBOX(_compositor, _size) \ + BoxSizeChecker checker(_compositor, _size); \ + FullBox::Write(); + +#define FOURCC(a, b, c, d) ( ((a) << 24) | ((b) << 16) | ((c) << 8) | (d) ) + +namespace mozilla { + +/** + * track type from spec 8.4.3.3 + */ +#define Audio_Track 0x01 +#define Video_Track 0x02 + +class AudioTrackMetadata; +class VideoTrackMetadata; +class ISOControl; + +/** + * This is the base class for all ISO media format boxes. + * It provides the fields of box type(four CC) and size. + * The data members in the beginning of a Box (or its descendants) + * are the 14496-12 defined member. Other members prefix with 'm' + * are private control data. + * + * This class is for inherited only, it shouldn't be instanced directly. + */ +class Box : public MuxerOperation { +protected: + // ISO BMFF members + uint32_t size; // 14496-12 4-2 'Object Structure'. Size of this box. + nsCString boxType; // four CC name, all table names are listed in + // 14496-12 table 1. + +public: + // MuxerOperation methods + nsresult Write() override; + nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) override; + + // This helper class will compare the written size in Write() and the size in + // Generate(). If their are not equal, it will assert. + class BoxSizeChecker { + public: + BoxSizeChecker(ISOControl* aControl, uint32_t aSize); + ~BoxSizeChecker(); + + uint32_t ori_size; + uint32_t box_size; + ISOControl* mControl; + }; + +protected: + Box() = delete; + Box(const nsACString& aType, ISOControl* aControl); + + ISOControl* mControl; + RefPtr<AudioTrackMetadata> mAudioMeta; + RefPtr<VideoTrackMetadata> mVideoMeta; +}; + +/** + * FullBox (and its descendants) is the box which contains the 'real' data + * members. It is the edge in the ISO box structure and it doesn't contain + * any box. + * + * This class is for inherited only, it shouldn't be instanced directly. + */ +class FullBox : public Box { +public: + // ISO BMFF members + uint8_t version; // 14496-12 4.2 'Object Structure' + std::bitset<24> flags; // + + // MuxerOperation methods + nsresult Write() override; + +protected: + // FullBox methods + FullBox(const nsACString& aType, uint8_t aVersion, uint32_t aFlags, + ISOControl* aControl); + FullBox() = delete; +}; + +/** + * The default implementation of the container box. + * Basically, the container box inherits this class and overrides the + * constructor only. + * + * According to 14496-12 3.1.1 'container box', a container box is + * 'box whose sole purpose is to contain and group a set of related boxes' + * + * This class is for inherited only, it shouldn't be instanced directly. + */ +class DefaultContainerImpl : public Box { +public: + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) override; + +protected: + // DefaultContainerImpl methods + DefaultContainerImpl(const nsACString& aType, ISOControl* aControl); + DefaultContainerImpl() = delete; + + nsTArray<RefPtr<MuxerOperation>> boxes; +}; + +// 14496-12 4.3 'File Type Box' +// Box type: 'ftyp' +class FileTypeBox : public Box { +public: + // ISO BMFF members + nsCString major_brand; // four chars + uint32_t minor_version; + nsTArray<nsCString> compatible_brands; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // FileTypeBox methods + FileTypeBox(ISOControl* aControl); + ~FileTypeBox(); +}; + +// 14496-12 8.2.1 'Movie Box' +// Box type: 'moov' +// MovieBox contains MovieHeaderBox, TrackBox and MovieExtendsBox. +class MovieBox : public DefaultContainerImpl { +public: + MovieBox(ISOControl* aControl); + ~MovieBox(); +}; + +// 14496-12 8.2.2 'Movie Header Box' +// Box type: 'mvhd' +class MovieHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t creation_time; + uint32_t modification_time; + uint32_t timescale; + uint32_t duration; + uint32_t rate; + uint16_t volume; + uint16_t reserved16; + uint32_t reserved32[2]; + uint32_t matrix[9]; + uint32_t pre_defined[6]; + uint32_t next_track_ID; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MovieHeaderBox methods + MovieHeaderBox(ISOControl* aControl); + ~MovieHeaderBox(); + uint32_t GetTimeScale(); +}; + +// 14496-12 8.4.2 'Media Header Box' +// Box type: 'mdhd' +class MediaHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t creation_time; + uint32_t modification_time; + uint32_t timescale; + uint32_t duration; + std::bitset<1> pad; + std::bitset<5> lang1; + std::bitset<5> lang2; + std::bitset<5> lang3; + uint16_t pre_defined; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MediaHeaderBox methods + MediaHeaderBox(uint32_t aType, ISOControl* aControl); + ~MediaHeaderBox(); + uint32_t GetTimeScale(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.3.1 'Track Box' +// Box type: 'trak' +// TrackBox contains TrackHeaderBox and MediaBox. +class TrackBox : public DefaultContainerImpl { +public: + TrackBox(uint32_t aTrackType, ISOControl* aControl); + ~TrackBox(); +}; + +// 14496-12 8.1.1 'Media Data Box' +// Box type: 'mdat' +class MediaDataBox : public Box { +public: + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MediaDataBox methods + uint32_t GetAllSampleSize() { return mAllSampleSize; } + uint32_t FirstSampleOffsetInMediaDataBox() { return mFirstSampleOffset; } + MediaDataBox(uint32_t aTrackType, ISOControl* aControl); + ~MediaDataBox(); + +protected: + uint32_t mAllSampleSize; // All audio and video sample size in this box. + uint32_t mFirstSampleOffset; // The offset of first sample in this box from + // the beginning of this mp4 file. + uint32_t mTrackType; +}; + +// flags for TrackRunBox::flags, 14496-12 8.8.8.1. +#define flags_data_offset_present 0x000001 +#define flags_first_sample_flags_present 0x000002 +#define flags_sample_duration_present 0x000100 +#define flags_sample_size_present 0x000200 +#define flags_sample_flags_present 0x000400 +#define flags_sample_composition_time_offsets_present 0x000800 + +// flag for TrackRunBox::tbl::sample_flags and TrackExtendsBox::default_sample_flags +// which is defined in 14496-12 8.8.3.1. +uint32_t set_sample_flags(bool aSync); + +// 14496-12 8.8.8 'Track Fragment Run Box' +// Box type: 'trun' +class TrackRunBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t sample_duration; + uint32_t sample_size; + uint32_t sample_flags; + uint32_t sample_composition_time_offset; + } tbl; + + uint32_t sample_count; + // the following are optional fields + uint32_t data_offset; // data offset exists when audio/video are present in file. + uint32_t first_sample_flags; + UniquePtr<tbl[]> sample_info_table; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackRunBox methods + uint32_t GetAllSampleSize() { return mAllSampleSize; } + nsresult SetDataOffset(uint32_t aOffset); + + TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl); + ~TrackRunBox(); + +protected: + uint32_t fillSampleTable(); + + uint32_t mAllSampleSize; + uint32_t mTrackType; +}; + +// tf_flags in TrackFragmentHeaderBox, 14496-12 8.8.7.1. +#define base_data_offset_present 0x000001 +#define sample_description_index_present 0x000002 +#define default_sample_duration_present 0x000008 +#define default_sample_size_present 0x000010 +#define default_sample_flags_present 0x000020 +#define duration_is_empty 0x010000 +#define default_base_is_moof 0x020000 + +// 14496-12 8.8.7 'Track Fragment Header Box' +// Box type: 'tfhd' +class TrackFragmentHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t track_ID; + uint64_t base_data_offset; + uint32_t default_sample_duration; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackFragmentHeaderBox methods + nsresult UpdateBaseDataOffset(uint64_t aOffset); // The offset of the first + // sample in file. + + TrackFragmentHeaderBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl); + ~TrackFragmentHeaderBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.6 'Track Fragment Box' +// Box type: 'traf' +// TrackFragmentBox cotains TrackFragmentHeaderBox and TrackRunBox. +class TrackFragmentBox : public DefaultContainerImpl { +public: + TrackFragmentBox(uint32_t aType, ISOControl* aControl); + ~TrackFragmentBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.5 'Movie Fragment Header Box' +// Box type: 'mfhd' +class MovieFragmentHeaderBox : public FullBox { +public: + // ISO BMFF members + uint32_t sequence_number; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MovieFragmentHeaderBox methods + MovieFragmentHeaderBox(uint32_t aType, ISOControl* aControl); + ~MovieFragmentHeaderBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.4 'Movie Fragment Box' +// Box type: 'moof' +// MovieFragmentBox contains MovieFragmentHeaderBox and TrackFragmentBox. +class MovieFragmentBox : public DefaultContainerImpl { +public: + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + + // MovieFragmentBox methods + MovieFragmentBox(uint32_t aType, ISOControl* aControl); + ~MovieFragmentBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.3 'Track Extends Box' +// Box type: 'trex' +class TrackExtendsBox : public FullBox { +public: + // ISO BMFF members + uint32_t track_ID; + uint32_t default_sample_description_index; + uint32_t default_sample_duration; + uint32_t default_sample_size; + uint32_t default_sample_flags; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackExtendsBox methods + TrackExtendsBox(uint32_t aType, ISOControl* aControl); + ~TrackExtendsBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.8.1 'Movie Extends Box' +// Box type: 'mvex' +// MovieExtendsBox contains TrackExtendsBox. +class MovieExtendsBox : public DefaultContainerImpl { +public: + MovieExtendsBox(ISOControl* aControl); + ~MovieExtendsBox(); +}; + +// 14496-12 8.7.5 'Chunk Offset Box' +// Box type: 'stco' +class ChunkOffsetBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t chunk_offset; + } tbl; + + uint32_t entry_count; + UniquePtr<tbl[]> sample_tbl; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // ChunkOffsetBox methods + ChunkOffsetBox(uint32_t aType, ISOControl* aControl); + ~ChunkOffsetBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.7.4 'Sample To Chunk Box' +// Box type: 'stsc' +class SampleToChunkBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t first_chunk; + uint32_t sample_per_chunk; + uint32_t sample_description_index; + } tbl; + + uint32_t entry_count; + UniquePtr<tbl[]> sample_tbl; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SampleToChunkBox methods + SampleToChunkBox(uint32_t aType, ISOControl* aControl); + ~SampleToChunkBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.6.1.2 'Decoding Time to Sample Box' +// Box type: 'stts' +class TimeToSampleBox : public FullBox { +public: + // ISO BMFF members + typedef struct { + uint32_t sample_count; + uint32_t sample_delta; + } tbl; + + uint32_t entry_count; + UniquePtr<tbl[]> sample_tbl; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TimeToSampleBox methods + TimeToSampleBox(uint32_t aType, ISOControl* aControl); + ~TimeToSampleBox(); + +protected: + uint32_t mTrackType; +}; + +/** + * 14496-12 8.5.2 'Sample Description Box' + * This is the base class for VisualSampleEntry and AudioSampleEntry. + * + * This class is for inherited only, it shouldn't be instanced directly. + * + * The inhertied tree of a codec box should be: + * + * +--> AVCSampleEntry + * +--> VisualSampleEntryBox + + * | +--> ... + * SampleEntryBox + + * | +--> MP4AudioSampleEntry + * +--> AudioSampleEntryBox + + * +--> AMRSampleEntry + * + + * +--> ... + * + */ +class SampleEntryBox : public Box { +public: + // ISO BMFF members + uint8_t reserved[6]; + uint16_t data_reference_index; + + // sampleentrybox methods + SampleEntryBox(const nsACString& aFormat, ISOControl* aControl); + + // MuxerOperation methods + nsresult Write() override; + +protected: + SampleEntryBox() = delete; +}; + +// 14496-12 8.5.2 'Sample Description Box' +// Box type: 'stsd' +class SampleDescriptionBox : public FullBox { +public: + // ISO BMFF members + uint32_t entry_count; + RefPtr<SampleEntryBox> sample_entry_box; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SampleDescriptionBox methods + SampleDescriptionBox(uint32_t aType, ISOControl* aControl); + ~SampleDescriptionBox(); + +protected: + nsresult CreateAudioSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry); + nsresult CreateVideoSampleEntry(RefPtr<SampleEntryBox>& aSampleEntry); + + uint32_t mTrackType; +}; + +// 14496-12 8.5.2.2 +// The base class for audio codec box. +// This class is for inherited only, it shouldn't be instanced directly. +class AudioSampleEntry : public SampleEntryBox { +public: + // ISO BMFF members + uint16_t sound_version; + uint8_t reserved2[6]; + uint16_t channels; + uint16_t sample_size; + uint16_t compressionId; + uint16_t packet_size; + uint32_t timeScale; // (sample rate of media) <<16 + + // MuxerOperation methods + nsresult Write() override; + + ~AudioSampleEntry(); + +protected: + AudioSampleEntry(const nsACString& aFormat, ISOControl* aControl); +}; + +// 14496-12 8.5.2.2 +// The base class for video codec box. +// This class is for inherited only, it shouldn't be instanced directly. +class VisualSampleEntry : public SampleEntryBox { +public: + // ISO BMFF members + uint8_t reserved[16]; + uint16_t width; + uint16_t height; + + uint32_t horizresolution; // 72 dpi + uint32_t vertresolution; // 72 dpi + uint32_t reserved2; + uint16_t frame_count; // 1, defined in 14496-12 8.5.2.2 + + uint8_t compressorName[32]; + uint16_t depth; // 0x0018, defined in 14496-12 8.5.2.2; + uint16_t pre_defined; // -1, defined in 14496-12 8.5.2.2; + + // MuxerOperation methods + nsresult Write() override; + + // VisualSampleEntry methods + ~VisualSampleEntry(); + +protected: + VisualSampleEntry(const nsACString& aFormat, ISOControl* aControl); +}; + +// 14496-12 8.7.3.2 'Sample Size Box' +// Box type: 'stsz' +class SampleSizeBox : public FullBox { +public: + // ISO BMFF members + uint32_t sample_size; + uint32_t sample_count; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SampleSizeBox methods + SampleSizeBox(ISOControl* aControl); + ~SampleSizeBox(); +}; + +// 14496-12 8.5.1 'Sample Table Box' +// Box type: 'stbl' +// +// SampleTableBox contains SampleDescriptionBox, +// TimeToSampleBox, +// SampleToChunkBox, +// SampleSizeBox and +// ChunkOffsetBox. +class SampleTableBox : public DefaultContainerImpl { +public: + SampleTableBox(uint32_t aType, ISOControl* aControl); + ~SampleTableBox(); +}; + +// 14496-12 8.7.2 'Data Reference Box' +// Box type: 'url ' +class DataEntryUrlBox : public FullBox { +public: + // ISO BMFF members + // flags in DataEntryUrlBox::flags + const static uint16_t flags_media_at_the_same_file = 0x0001; + + nsCString location; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // DataEntryUrlBox methods + DataEntryUrlBox(); + DataEntryUrlBox(ISOControl* aControl); + DataEntryUrlBox(const DataEntryUrlBox& aBox); + ~DataEntryUrlBox(); +}; + +// 14496-12 8.7.2 'Data Reference Box' +// Box type: 'dref' +class DataReferenceBox : public FullBox { +public: + // ISO BMFF members + uint32_t entry_count; + nsTArray<nsAutoPtr<DataEntryUrlBox>> urls; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // DataReferenceBox methods + DataReferenceBox(ISOControl* aControl); + ~DataReferenceBox(); +}; + +// 14496-12 8.7.1 'Data Information Box' +// Box type: 'dinf' +// DataInformationBox contains DataReferenceBox. +class DataInformationBox : public DefaultContainerImpl { +public: + DataInformationBox(ISOControl* aControl); + ~DataInformationBox(); +}; + +// 14496-12 8.4.5.2 'Video Media Header Box' +// Box type: 'vmhd' +class VideoMediaHeaderBox : public FullBox { +public: + // ISO BMFF members + uint16_t graphicsmode; + uint16_t opcolor[3]; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // VideoMediaHeaderBox methods + VideoMediaHeaderBox(ISOControl* aControl); + ~VideoMediaHeaderBox(); +}; + +// 14496-12 8.4.5.3 'Sound Media Header Box' +// Box type: 'smhd' +class SoundMediaHeaderBox : public FullBox { +public: + // ISO BMFF members + uint16_t balance; + uint16_t reserved; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // SoundMediaHeaderBox methods + SoundMediaHeaderBox(ISOControl* aControl); + ~SoundMediaHeaderBox(); +}; + +// 14496-12 8.4.4 'Media Information Box' +// Box type: 'minf' +// MediaInformationBox contains SoundMediaHeaderBox, DataInformationBox and +// SampleTableBox. +class MediaInformationBox : public DefaultContainerImpl { +public: + MediaInformationBox(uint32_t aType, ISOControl* aControl); + ~MediaInformationBox(); + +protected: + uint32_t mTrackType; +}; + +// flags for TrackHeaderBox::flags. +#define flags_track_enabled 0x000001 +#define flags_track_in_movie 0x000002 +#define flags_track_in_preview 0x000004 + +// 14496-12 8.3.2 'Track Header Box' +// Box type: 'tkhd' +class TrackHeaderBox : public FullBox { +public: + // ISO BMFF members + // version = 0 + uint32_t creation_time; + uint32_t modification_time; + uint32_t track_ID; + uint32_t reserved; + uint32_t duration; + + uint32_t reserved2[2]; + uint16_t layer; + uint16_t alternate_group; + uint16_t volume; + uint16_t reserved3; + uint32_t matrix[9]; + uint32_t width; + uint32_t height; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // TrackHeaderBox methods + TrackHeaderBox(uint32_t aType, ISOControl* aControl); + ~TrackHeaderBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.4.3 'Handler Reference Box' +// Box type: 'hdlr' +class HandlerBox : public FullBox { +public: + // ISO BMFF members + uint32_t pre_defined; + uint32_t handler_type; + uint32_t reserved[3]; + nsCString name; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // HandlerBox methods + HandlerBox(uint32_t aType, ISOControl* aControl); + ~HandlerBox(); + +protected: + uint32_t mTrackType; +}; + +// 14496-12 8.4.1 'Media Box' +// Box type: 'mdia' +// MediaBox contains MediaHeaderBox, HandlerBox, and MediaInformationBox. +class MediaBox : public DefaultContainerImpl { +public: + MediaBox(uint32_t aType, ISOControl* aControl); + ~MediaBox(); + +protected: + uint32_t mTrackType; +}; + +} +#endif // ISOMediaBoxes_h_ diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp new file mode 100644 index 000000000..fa23616e9 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.cpp @@ -0,0 +1,234 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ISOMediaWriter.h" +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "ISOTrackMetadata.h" +#include "nsThreadUtils.h" +#include "MediaEncoder.h" +#include "VideoUtils.h" +#include "GeckoProfiler.h" + +#undef LOG +#ifdef MOZ_WIDGET_GONK +#include <android/log.h> +#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args); +#else +#define LOG(args, ...) +#endif + +namespace mozilla { + +const static uint32_t FRAG_DURATION = 2 * USECS_PER_S; // microsecond per unit + +ISOMediaWriter::ISOMediaWriter(uint32_t aType, uint32_t aHint) + : ContainerWriter() + , mState(MUXING_HEAD) + , mBlobReady(false) + , mType(0) +{ + if (aType & CREATE_AUDIO_TRACK) { + mType |= Audio_Track; + } + if (aType & CREATE_VIDEO_TRACK) { + mType |= Video_Track; + } + mControl = new ISOControl(aHint); + MOZ_COUNT_CTOR(ISOMediaWriter); +} + +ISOMediaWriter::~ISOMediaWriter() +{ + MOZ_COUNT_DTOR(ISOMediaWriter); +} + +nsresult +ISOMediaWriter::RunState() +{ + nsresult rv; + switch (mState) { + case MUXING_HEAD: + { + rv = mControl->GenerateFtyp(); + NS_ENSURE_SUCCESS(rv, rv); + rv = mControl->GenerateMoov(); + NS_ENSURE_SUCCESS(rv, rv); + mState = MUXING_FRAG; + break; + } + case MUXING_FRAG: + { + rv = mControl->GenerateMoof(mType); + NS_ENSURE_SUCCESS(rv, rv); + + bool EOS; + if (ReadyToRunState(EOS) && EOS) { + mState = MUXING_DONE; + } + break; + } + case MUXING_DONE: + { + break; + } + } + mBlobReady = true; + return NS_OK; +} + +nsresult +ISOMediaWriter::WriteEncodedTrack(const EncodedFrameContainer& aData, + uint32_t aFlags) +{ + PROFILER_LABEL("ISOMediaWriter", "WriteEncodedTrack", + js::ProfileEntry::Category::OTHER); + // Muxing complete, it doesn't allowed to reentry again. + if (mState == MUXING_DONE) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + FragmentBuffer* frag = nullptr; + uint32_t len = aData.GetEncodedFrames().Length(); + + if (!len) { + // no frame? why bother to WriteEncodedTrack + return NS_OK; + } + for (uint32_t i = 0; i < len; i++) { + RefPtr<EncodedFrame> frame(aData.GetEncodedFrames()[i]); + EncodedFrame::FrameType type = frame->GetFrameType(); + if (type == EncodedFrame::AAC_AUDIO_FRAME || + type == EncodedFrame::AAC_CSD || + type == EncodedFrame::AMR_AUDIO_FRAME || + type == EncodedFrame::AMR_AUDIO_CSD || + type == EncodedFrame::EVRC_AUDIO_FRAME || + type == EncodedFrame::EVRC_AUDIO_CSD) { + frag = mAudioFragmentBuffer; + } else if (type == EncodedFrame::AVC_I_FRAME || + type == EncodedFrame::AVC_P_FRAME || + type == EncodedFrame::AVC_B_FRAME || + type == EncodedFrame::AVC_CSD) { + frag = mVideoFragmentBuffer; + } else { + MOZ_ASSERT(0); + return NS_ERROR_FAILURE; + } + + frag->AddFrame(frame); + } + + // Encoder should send CSD (codec specific data) frame before sending the + // audio/video frames. When CSD data is ready, it is sufficient to generate a + // moov data. If encoder doesn't send CSD yet, muxer needs to wait before + // generating anything. + if (mType & Audio_Track && (!mAudioFragmentBuffer || + !mAudioFragmentBuffer->HasCSD())) { + return NS_OK; + } + if (mType & Video_Track && (!mVideoFragmentBuffer || + !mVideoFragmentBuffer->HasCSD())) { + return NS_OK; + } + + // Only one FrameType in EncodedFrameContainer so it doesn't need to be + // inside the for-loop. + if (frag && (aFlags & END_OF_STREAM)) { + frag->SetEndOfStream(); + } + + nsresult rv; + bool EOS; + if (ReadyToRunState(EOS)) { + // Because track encoder won't generate new data after EOS, it needs to make + // sure the state reaches MUXING_DONE when EOS is signaled. + do { + rv = RunState(); + } while (EOS && mState != MUXING_DONE); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +bool +ISOMediaWriter::ReadyToRunState(bool& aEOS) +{ + aEOS = false; + bool bReadyToMux = true; + if ((mType & Audio_Track) && (mType & Video_Track)) { + if (!mAudioFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + if (!mVideoFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + + if (mAudioFragmentBuffer->EOS() && mVideoFragmentBuffer->EOS()) { + aEOS = true; + bReadyToMux = true; + } + } else if (mType == Audio_Track) { + if (!mAudioFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + if (mAudioFragmentBuffer->EOS()) { + aEOS = true; + bReadyToMux = true; + } + } else if (mType == Video_Track) { + if (!mVideoFragmentBuffer->HasEnoughData()) { + bReadyToMux = false; + } + if (mVideoFragmentBuffer->EOS()) { + aEOS = true; + bReadyToMux = true; + } + } + + return bReadyToMux; +} + +nsresult +ISOMediaWriter::GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags) +{ + PROFILER_LABEL("ISOMediaWriter", "GetContainerData", + js::ProfileEntry::Category::OTHER); + if (mBlobReady) { + if (mState == MUXING_DONE) { + mIsWritingComplete = true; + } + mBlobReady = false; + return mControl->GetBufs(aOutputBufs); + } + return NS_OK; +} + +nsresult +ISOMediaWriter::SetMetadata(TrackMetadataBase* aMetadata) +{ + PROFILER_LABEL("ISOMediaWriter", "SetMetadata", + js::ProfileEntry::Category::OTHER); + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_AAC || + aMetadata->GetKind() == TrackMetadataBase::METADATA_AMR || + aMetadata->GetKind() == TrackMetadataBase::METADATA_EVRC) { + mControl->SetMetadata(aMetadata); + mAudioFragmentBuffer = new FragmentBuffer(Audio_Track, FRAG_DURATION); + mControl->SetFragment(mAudioFragmentBuffer); + return NS_OK; + } + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_AVC) { + mControl->SetMetadata(aMetadata); + mVideoFragmentBuffer = new FragmentBuffer(Video_Track, FRAG_DURATION); + mControl->SetFragment(mVideoFragmentBuffer); + return NS_OK; + } + + return NS_ERROR_FAILURE; +} + +} // namespace mozilla diff --git a/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h new file mode 100644 index 000000000..cccbbe3cb --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOMediaWriter.h @@ -0,0 +1,108 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOMediaWriter_h_ +#define ISOMediaWriter_h_ + +#include "ContainerWriter.h" +#include "nsAutoPtr.h" +#include "nsIRunnable.h" + +namespace mozilla { + +class ISOControl; +class FragmentBuffer; + +class ISOMediaWriter : public ContainerWriter +{ +public: + // Generate an fragmented MP4 stream, ISO/IEC 14496-12. + // Brand names in 'ftyp' box are 'isom' and 'mp42'. + const static uint32_t TYPE_FRAG_MP4 = 1 << 0; + + // Generate an fragmented 3GP stream, 3GPP TS 26.244, + // '5.4.3 Basic profile'. + // Brand names in 'ftyp' box are '3gp9' and 'isom'. + const static uint32_t TYPE_FRAG_3GP = 1 << 1; + + // Generate an fragmented 3G2 stream, 3GPP2 C.S0050-B + // Brand names in 'ftyp' box are '3g2c' and 'isom' + const static uint32_t TYPE_FRAG_3G2 = 1 << 2; + + // aType is the combination of CREATE_AUDIO_TRACK and CREATE_VIDEO_TRACK. + // It is a hint to muxer that the output streaming contains audio, video + // or both. + // + // aHint is one of the value in TYPE_XXXXXXXX. It is a hint to muxer what kind + // of ISO format should be generated. + ISOMediaWriter(uint32_t aType, uint32_t aHint = TYPE_FRAG_MP4); + ~ISOMediaWriter(); + + // ContainerWriter methods + nsresult WriteEncodedTrack(const EncodedFrameContainer &aData, + uint32_t aFlags = 0) override; + + nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags = 0) override; + + nsresult SetMetadata(TrackMetadataBase* aMetadata) override; + +protected: + /** + * The state of each state will generate one or more blob. + * Each blob will be a moov, moof, moof... until receiving EOS. + * The generated sequence is: + * + * moov -> moof -> moof -> ... -> moof -> moof + * + * Following is the details of each state. + * MUXING_HEAD: + * It collects the metadata to generate a moov. The state transits to + * MUXING_HEAD after output moov blob. + * + * MUXING_FRAG: + * It collects enough audio/video data to generate a fragment blob. This + * will be repeated until END_OF_STREAM and then transiting to MUXING_DONE. + * + * MUXING_DONE: + * End of ISOMediaWriter life cycle. + */ + enum MuxState { + MUXING_HEAD, + MUXING_FRAG, + MUXING_DONE, + }; + +private: + nsresult RunState(); + + // True if one of following conditions hold: + // 1. Audio/Video accumulates enough data to generate a moof. + // 2. Get EOS signal. + // aEOS will be assigned to true if it gets EOS signal. + bool ReadyToRunState(bool& aEOS); + + // The main class to generate and iso box. Its life time is same as + // ISOMediaWriter and deleted only if ISOMediaWriter is destroyed. + nsAutoPtr<ISOControl> mControl; + + // Buffers to keep audio/video data frames, they are created when metadata is + // received. Only one instance for each media type is allowed and they will be + // deleted only if ISOMediaWriter is destroyed. + nsAutoPtr<FragmentBuffer> mAudioFragmentBuffer; + nsAutoPtr<FragmentBuffer> mVideoFragmentBuffer; + + MuxState mState; + + // A flag to indicate the output buffer is ready to blob out. + bool mBlobReady; + + // Combination of Audio_Track or Video_Track. + uint32_t mType; +}; + +} // namespace mozilla + +#endif // ISOMediaWriter_h_ diff --git a/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h b/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h new file mode 100644 index 000000000..3613e1e9e --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/ISOTrackMetadata.h @@ -0,0 +1,131 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ISOTrackMetadata_h_ +#define ISOTrackMetadata_h_ + +#include "TrackMetadataBase.h" + +namespace mozilla { + +class AACTrackMetadata : public AudioTrackMetadata { +public: + // AudioTrackMetadata members + uint32_t GetAudioFrameDuration() override { return mFrameDuration; } + uint32_t GetAudioFrameSize() override { return mFrameSize; } + uint32_t GetAudioSampleRate() override { return mSampleRate; } + uint32_t GetAudioChannels() override { return mChannels; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_AAC; } + + // AACTrackMetadata members + AACTrackMetadata() + : mSampleRate(0) + , mFrameDuration(0) + , mFrameSize(0) + , mChannels(0) { + MOZ_COUNT_CTOR(AACTrackMetadata); + } + ~AACTrackMetadata() { MOZ_COUNT_DTOR(AACTrackMetadata); } + + uint32_t mSampleRate; // From 14496-3 table 1.16, it could be 7350 ~ 96000. + uint32_t mFrameDuration; // Audio frame duration based on SampleRate. + uint32_t mFrameSize; // Audio frame size, 0 is variant size. + uint32_t mChannels; // Channel number, it should be 1 or 2. +}; + +// AVC clock rate is 90k Hz. +#define AVC_CLOCK_RATE 90000 + +class AVCTrackMetadata : public VideoTrackMetadata { +public: + // VideoTrackMetadata members + uint32_t GetVideoHeight() override { return mHeight; } + uint32_t GetVideoWidth() override {return mWidth; } + uint32_t GetVideoDisplayHeight() override { return mDisplayHeight; } + uint32_t GetVideoDisplayWidth() override { return mDisplayWidth; } + uint32_t GetVideoClockRate() override { return AVC_CLOCK_RATE; } + uint32_t GetVideoFrameRate() override { return mFrameRate; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_AVC; } + + // AVCTrackMetadata + AVCTrackMetadata() + : mHeight(0) + , mWidth(0) + , mDisplayHeight(0) + , mDisplayWidth(0) + , mFrameRate(0) { + MOZ_COUNT_CTOR(AVCTrackMetadata); + } + ~AVCTrackMetadata() { MOZ_COUNT_DTOR(AVCTrackMetadata); } + + uint32_t mHeight; + uint32_t mWidth; + uint32_t mDisplayHeight; + uint32_t mDisplayWidth; + uint32_t mFrameRate; // frames per second +}; + + +// AMR sample rate is 8000 samples/s. +#define AMR_SAMPLE_RATE 8000 + +// Channel number is always 1. +#define AMR_CHANNELS 1 + +// AMR speech codec, 3GPP TS 26.071. Encoder and continer support AMR-NB only +// currently. +class AMRTrackMetadata : public AudioTrackMetadata { +public: + // AudioTrackMetadata members + // + // The number of sample sets generates by encoder is variant. So the + // frame duration and frame size are both 0. + uint32_t GetAudioFrameDuration() override { return 0; } + uint32_t GetAudioFrameSize() override { return 0; } + uint32_t GetAudioSampleRate() override { return AMR_SAMPLE_RATE; } + uint32_t GetAudioChannels() override { return AMR_CHANNELS; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_AMR; } + + // AMRTrackMetadata members + AMRTrackMetadata() { MOZ_COUNT_CTOR(AMRTrackMetadata); } + ~AMRTrackMetadata() { MOZ_COUNT_DTOR(AMRTrackMetadata); } +}; + +// EVRC sample rate is 8000 samples/s. +#define EVRC_SAMPLE_RATE 8000 + +class EVRCTrackMetadata : public AudioTrackMetadata { +public: + // AudioTrackMetadata members + // + // The number of sample sets generates by encoder is variant. So the + // frame duration and frame size are both 0. + uint32_t GetAudioFrameDuration() override { return 0; } + uint32_t GetAudioFrameSize() override { return 0; } + uint32_t GetAudioSampleRate() override { return EVRC_SAMPLE_RATE; } + uint32_t GetAudioChannels() override { return mChannels; } + + // TrackMetadataBase member + MetadataKind GetKind() const override { return METADATA_EVRC; } + + // EVRCTrackMetadata members + EVRCTrackMetadata() + : mChannels(0) { + MOZ_COUNT_CTOR(EVRCTrackMetadata); + } + ~EVRCTrackMetadata() { MOZ_COUNT_DTOR(EVRCTrackMetadata); } + + uint32_t mChannels; // Channel number, it should be 1 or 2. +}; + +} + +#endif // ISOTrackMetadata_h_ diff --git a/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp b/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp new file mode 100644 index 000000000..72880b5cb --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/MP4ESDS.cpp @@ -0,0 +1,138 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include "ISOControl.h" +#include "ISOMediaBoxes.h" +#include "MP4ESDS.h" + +namespace mozilla { + +nsresult +MP4AudioSampleEntry::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + nsresult rv = es->Generate(&box_size); + NS_ENSURE_SUCCESS(rv, rv); + size += box_size; + + *aBoxSize = size; + return NS_OK; +} + +nsresult +MP4AudioSampleEntry::Write() +{ + BoxSizeChecker checker(mControl, size); + nsresult rv; + rv = AudioSampleEntry::Write(); + NS_ENSURE_SUCCESS(rv, rv); + rv = es->Write(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +MP4AudioSampleEntry::MP4AudioSampleEntry(ISOControl* aControl) + : AudioSampleEntry(NS_LITERAL_CSTRING("mp4a"), aControl) +{ + es = new ESDBox(aControl); + MOZ_COUNT_CTOR(MP4AudioSampleEntry); +} + +MP4AudioSampleEntry::~MP4AudioSampleEntry() +{ + MOZ_COUNT_DTOR(MP4AudioSampleEntry); +} + +nsresult +ESDBox::Generate(uint32_t* aBoxSize) +{ + uint32_t box_size; + es_descriptor->Generate(&box_size); + size += box_size; + *aBoxSize = size; + return NS_OK; +} + +nsresult +ESDBox::Write() +{ + WRITE_FULLBOX(mControl, size) + es_descriptor->Write(); + return NS_OK; +} + +ESDBox::ESDBox(ISOControl* aControl) + : FullBox(NS_LITERAL_CSTRING("esds"), 0, 0, aControl) +{ + es_descriptor = new ES_Descriptor(aControl); + MOZ_COUNT_CTOR(ESDBox); +} + +ESDBox::~ESDBox() +{ + MOZ_COUNT_DTOR(ESDBox); +} + +nsresult +ES_Descriptor::Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) +{ + // ES_Descriptor is not a real ISOMediaBox, so we return nothing here. + return NS_OK; +} + +nsresult +ES_Descriptor::Write() +{ + mControl->Write(tag); + mControl->Write(length); + mControl->Write(ES_ID); + mControl->WriteBits(streamDependenceFlag.to_ulong(), streamDependenceFlag.size()); + mControl->WriteBits(URL_Flag.to_ulong(), URL_Flag.size()); + mControl->WriteBits(reserved.to_ulong(), reserved.size()); + mControl->WriteBits(streamPriority.to_ulong(), streamPriority.size()); + mControl->Write(DecodeSpecificInfo.Elements(), DecodeSpecificInfo.Length()); + + return NS_OK; +} + +nsresult +ES_Descriptor::Generate(uint32_t* aBoxSize) +{ + nsresult rv; + // 14496-1 '8.3.4 DecoderConfigDescriptor' + // 14496-1 '10.2.3 SL Packet Header Configuration' + FragmentBuffer* frag = mControl->GetFragment(Audio_Track); + rv = frag->GetCSD(DecodeSpecificInfo); + NS_ENSURE_SUCCESS(rv, rv); + + length = sizeof(ES_ID) + 1; + length += DecodeSpecificInfo.Length(); + + *aBoxSize = sizeof(tag) + sizeof(length) + length; + return NS_OK; +} + +ES_Descriptor::ES_Descriptor(ISOControl* aControl) + : tag(ESDescrTag) + , length(0) + , ES_ID(0) + , streamDependenceFlag(0) + , URL_Flag(0) + , reserved(0) + , streamPriority(0) + , mControl(aControl) +{ + MOZ_COUNT_CTOR(ES_Descriptor); +} + +ES_Descriptor::~ES_Descriptor() +{ + MOZ_COUNT_DTOR(ES_Descriptor); +} + +} diff --git a/dom/media/encoder/fmp4_muxer/MP4ESDS.h b/dom/media/encoder/fmp4_muxer/MP4ESDS.h new file mode 100644 index 000000000..ee91312c1 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/MP4ESDS.h @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MP4ESDS_h_ +#define MP4ESDS_h_ + +#include "nsTArray.h" +#include "MuxerOperation.h" + +namespace mozilla { + +class ISOControl; + +/** + * ESDS tag + */ +#define ESDescrTag 0x03 + +/** + * 14496-1 '8.3.3 ES_Descriptor'. + * It will get DecoderConfigDescriptor and SLConfigDescriptor from + * AAC CSD data. + */ +class ES_Descriptor : public MuxerOperation { +public: + // ISO BMFF members + uint8_t tag; // ESDescrTag + uint8_t length; + uint16_t ES_ID; + std::bitset<1> streamDependenceFlag; + std::bitset<1> URL_Flag; + std::bitset<1> reserved; + std::bitset<5> streamPriority; + + nsTArray<uint8_t> DecodeSpecificInfo; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) override; + + // ES_Descriptor methods + ES_Descriptor(ISOControl* aControl); + ~ES_Descriptor(); + +protected: + ISOControl* mControl; +}; + +// 14496-14 5.6 'Sample Description Boxes' +// Box type: 'esds' +class ESDBox : public FullBox { +public: + // ISO BMFF members + RefPtr<ES_Descriptor> es_descriptor; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // ESDBox methods + ESDBox(ISOControl* aControl); + ~ESDBox(); +}; + +// 14496-14 5.6 'Sample Description Boxes' +// Box type: 'mp4a' +class MP4AudioSampleEntry : public AudioSampleEntry { +public: + // ISO BMFF members + RefPtr<ESDBox> es; + + // MuxerOperation methods + nsresult Generate(uint32_t* aBoxSize) override; + nsresult Write() override; + + // MP4AudioSampleEntry methods + MP4AudioSampleEntry(ISOControl* aControl); + ~MP4AudioSampleEntry(); +}; + +} + +#endif // MP4ESDS_h_ diff --git a/dom/media/encoder/fmp4_muxer/MuxerOperation.h b/dom/media/encoder/fmp4_muxer/MuxerOperation.h new file mode 100644 index 000000000..0b83c89b0 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/MuxerOperation.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsTArray.h" + +#ifndef MuxerOperation_h_ +#define MuxerOperation_h_ + +namespace mozilla { + +/** + * The interface for ISO box. All Boxes inherit from this interface. + * Generate() and Write() are needed to be called to produce a complete box. + * + * Generate() will generate all the data structures and their size. + * + * Write() will write all data into muxing output stream (ISOControl actually) + * and update the data which can't be known at Generate() (for example, the + * offset of the video data in mp4 file). + * + * ISO base media format is composed of several container boxes and the contained + * boxes. The container boxes hold a list of MuxerOperation which is implemented + * by contained boxes. The contained boxes will be called via the list. + * For example: + * MovieBox (container) ---> boxes (array of MuxerOperation) + * |---> MovieHeaderBox (full box) + * |---> TrakBox (container) + * |---> MovieExtendsBox (container) + * + * The complete box structure can be found at 14496-12 E.2 "The‘isom’brand". + */ +class MuxerOperation { +public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MuxerOperation) + + // Generate data of this box and its contained box, and calculate box size. + virtual nsresult Generate(uint32_t* aBoxSize) = 0; + + // Write data to stream. + virtual nsresult Write() = 0; + + // Find the box type via its name (name is the box type defined in 14496-12; + // for example, 'moov' is the name of MovieBox). + // It can only look child boxes including itself and the box in the boxes + // list if exists. It can't look parent boxes. + virtual nsresult Find(const nsACString& aType, + nsTArray<RefPtr<MuxerOperation>>& aOperations) = 0; + +protected: + virtual ~MuxerOperation() {} +}; + +} +#endif diff --git a/dom/media/encoder/fmp4_muxer/moz.build b/dom/media/encoder/fmp4_muxer/moz.build new file mode 100644 index 000000000..5ff274be5 --- /dev/null +++ b/dom/media/encoder/fmp4_muxer/moz.build @@ -0,0 +1,22 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + 'ISOMediaWriter.h', + 'ISOTrackMetadata.h', +] + +UNIFIED_SOURCES += [ + 'AMRBox.cpp', + 'AVCBox.cpp', + 'EVRCBox.cpp', + 'ISOControl.cpp', + 'ISOMediaBoxes.cpp', + 'ISOMediaWriter.cpp', + 'MP4ESDS.cpp', +] + +FINAL_LIBRARY = 'xul' diff --git a/dom/media/encoder/moz.build b/dom/media/encoder/moz.build new file mode 100644 index 000000000..0d5cdc16f --- /dev/null +++ b/dom/media/encoder/moz.build @@ -0,0 +1,56 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files('*'): + BUG_COMPONENT = ('Core', 'Video/Audio: Recording') + +if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk': + DIRS += ['fmp4_muxer'] + +EXPORTS += [ + 'ContainerWriter.h', + 'EncodedFrameContainer.h', + 'MediaEncoder.h', + 'OpusTrackEncoder.h', + 'TrackEncoder.h', + 'TrackMetadataBase.h', +] + +UNIFIED_SOURCES += [ + 'MediaEncoder.cpp', + 'OpusTrackEncoder.cpp', + 'TrackEncoder.cpp', +] + +if CONFIG['MOZ_WEBM_ENCODER']: + EXPORTS += ['VP8TrackEncoder.h', + ] + UNIFIED_SOURCES += ['VP8TrackEncoder.cpp', + ] + LOCAL_INCLUDES += ['/media/libyuv/include'] + +FINAL_LIBRARY = 'xul' + +# These includes are from Android JB, for use of MediaCodec. +LOCAL_INCLUDES += ['/ipc/chromium/src'] + +if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk' and CONFIG['ANDROID_VERSION'] > '15': + LOCAL_INCLUDES += [ + '%' + '%s/%s' % (CONFIG['ANDROID_SOURCE'], d) for d in [ + 'frameworks/av/include/media', + 'frameworks/native/include', + 'frameworks/native/opengl/include', + ] + + ] + +include('/ipc/chromium/chromium-config.mozbuild') + +# Suppress some GCC warnings being treated as errors: +# - about attributes on forward declarations for types that are already +# defined, which complains about an important MOZ_EXPORT for android::AString +if CONFIG['GNU_CC']: + CXXFLAGS += ['-Wno-error=attributes'] |