diff options
Diffstat (limited to 'dom/media/webm')
-rw-r--r-- | dom/media/webm/EbmlComposer.cpp | 236 | ||||
-rw-r--r-- | dom/media/webm/EbmlComposer.h | 89 | ||||
-rw-r--r-- | dom/media/webm/NesteggPacketHolder.h | 124 | ||||
-rw-r--r-- | dom/media/webm/WebMBufferedParser.cpp | 514 | ||||
-rw-r--r-- | dom/media/webm/WebMBufferedParser.h | 322 | ||||
-rw-r--r-- | dom/media/webm/WebMDecoder.cpp | 99 | ||||
-rw-r--r-- | dom/media/webm/WebMDecoder.h | 46 | ||||
-rw-r--r-- | dom/media/webm/WebMDemuxer.cpp | 1155 | ||||
-rw-r--r-- | dom/media/webm/WebMDemuxer.h | 292 | ||||
-rw-r--r-- | dom/media/webm/WebMWriter.cpp | 87 | ||||
-rw-r--r-- | dom/media/webm/WebMWriter.h | 75 | ||||
-rw-r--r-- | dom/media/webm/moz.build | 31 |
12 files changed, 3070 insertions, 0 deletions
diff --git a/dom/media/webm/EbmlComposer.cpp b/dom/media/webm/EbmlComposer.cpp new file mode 100644 index 000000000..1b8008a1b --- /dev/null +++ b/dom/media/webm/EbmlComposer.cpp @@ -0,0 +1,236 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "EbmlComposer.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/EndianUtils.h" +#include "libmkv/EbmlIDs.h" +#include "libmkv/EbmlWriter.h" +#include "libmkv/WebMElement.h" +#include "prtime.h" +#include "limits.h" + +namespace mozilla { + +// Timecode scale in nanoseconds +static const unsigned long TIME_CODE_SCALE = 1000000; +// The WebM header size without audio CodecPrivateData +static const int32_t DEFAULT_HEADER_SIZE = 1024; + +void EbmlComposer::GenerateHeader() +{ + // Write the EBML header. + EbmlGlobal ebml; + // The WEbM header default size usually smaller than 1k. + auto buffer = MakeUnique<uint8_t[]>(DEFAULT_HEADER_SIZE + + mCodecPrivateData.Length()); + ebml.buf = buffer.get(); + ebml.offset = 0; + writeHeader(&ebml); + { + EbmlLoc segEbmlLoc, ebmlLocseg, ebmlLoc; + Ebml_StartSubElement(&ebml, &segEbmlLoc, Segment); + { + Ebml_StartSubElement(&ebml, &ebmlLocseg, SeekHead); + // Todo: We don't know the exact sizes of encoded data and + // ignore this section. + Ebml_EndSubElement(&ebml, &ebmlLocseg); + writeSegmentInformation(&ebml, &ebmlLoc, TIME_CODE_SCALE, 0); + { + EbmlLoc trackLoc; + Ebml_StartSubElement(&ebml, &trackLoc, Tracks); + { + // Video + if (mWidth > 0 && mHeight > 0) { + writeVideoTrack(&ebml, 0x1, 0, "V_VP8", + mWidth, mHeight, + mDisplayWidth, mDisplayHeight, mFrameRate); + } + // Audio + if (mCodecPrivateData.Length() > 0) { + // Extract the pre-skip from mCodecPrivateData + // then convert it to nanoseconds. + // Details in OpusTrackEncoder.cpp. + mCodecDelay = + (uint64_t)LittleEndian::readUint16(mCodecPrivateData.Elements() + 10) + * PR_NSEC_PER_SEC / 48000; + // Fixed 80ms, convert into nanoseconds. + uint64_t seekPreRoll = 80 * PR_NSEC_PER_MSEC; + writeAudioTrack(&ebml, 0x2, 0x0, "A_OPUS", mSampleFreq, + mChannels, mCodecDelay, seekPreRoll, + mCodecPrivateData.Elements(), + mCodecPrivateData.Length()); + } + } + Ebml_EndSubElement(&ebml, &trackLoc); + } + } + // The Recording length is unknown and + // ignore write the whole Segment element size + } + MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE + mCodecPrivateData.Length(), + "write more data > EBML_BUFFER_SIZE"); + auto block = mClusterBuffs.AppendElement(); + block->SetLength(ebml.offset); + memcpy(block->Elements(), ebml.buf, ebml.offset); + mFlushState |= FLUSH_METADATA; +} + +void EbmlComposer::FinishMetadata() +{ + if (mFlushState & FLUSH_METADATA) { + // We don't remove the first element of mClusterBuffs because the + // |mClusterHeaderIndex| may have value. + mClusterCanFlushBuffs.AppendElement()->SwapElements(mClusterBuffs[0]); + mFlushState &= ~FLUSH_METADATA; + } +} + +void EbmlComposer::FinishCluster() +{ + FinishMetadata(); + if (!(mFlushState & FLUSH_CLUSTER)) { + // No completed cluster available. + return; + } + + MOZ_ASSERT(mClusterLengthLoc > 0); + EbmlGlobal ebml; + EbmlLoc ebmlLoc; + ebmlLoc.offset = mClusterLengthLoc; + ebml.offset = 0; + for (uint32_t i = mClusterHeaderIndex; i < mClusterBuffs.Length(); i++) { + ebml.offset += mClusterBuffs[i].Length(); + } + ebml.buf = mClusterBuffs[mClusterHeaderIndex].Elements(); + Ebml_EndSubElement(&ebml, &ebmlLoc); + // Move the mClusterBuffs data from mClusterHeaderIndex that we can skip + // the metadata and the rest P-frames after ContainerWriter::FLUSH_NEEDED. + for (uint32_t i = mClusterHeaderIndex; i < mClusterBuffs.Length(); i++) { + mClusterCanFlushBuffs.AppendElement()->SwapElements(mClusterBuffs[i]); + } + + mClusterHeaderIndex = 0; + mClusterLengthLoc = 0; + mClusterBuffs.Clear(); + mFlushState &= ~FLUSH_CLUSTER; +} + +void +EbmlComposer::WriteSimpleBlock(EncodedFrame* aFrame) +{ + EbmlGlobal ebml; + ebml.offset = 0; + + auto frameType = aFrame->GetFrameType(); + bool flush = false; + bool isVP8IFrame = (frameType == EncodedFrame::FrameType::VP8_I_FRAME); + if (isVP8IFrame) { + FinishCluster(); + flush = true; + } else { + // Force it to calculate timecode using signed math via cast + int64_t timeCode = (aFrame->GetTimeStamp() / ((int) PR_USEC_PER_MSEC) - mClusterTimecode) + + (mCodecDelay / PR_NSEC_PER_MSEC); + if (timeCode < SHRT_MIN || timeCode > SHRT_MAX ) { + // We're probably going to overflow (or underflow) the timeCode value later! + FinishCluster(); + flush = true; + } + } + + auto block = mClusterBuffs.AppendElement(); + block->SetLength(aFrame->GetFrameData().Length() + DEFAULT_HEADER_SIZE); + ebml.buf = block->Elements(); + + if (flush) { + EbmlLoc ebmlLoc; + Ebml_StartSubElement(&ebml, &ebmlLoc, Cluster); + MOZ_ASSERT(mClusterBuffs.Length() > 0); + // current cluster header array index + mClusterHeaderIndex = mClusterBuffs.Length() - 1; + mClusterLengthLoc = ebmlLoc.offset; + // if timeCode didn't under/overflow before, it shouldn't after this + mClusterTimecode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC; + Ebml_SerializeUnsigned(&ebml, Timecode, mClusterTimecode); + mFlushState |= FLUSH_CLUSTER; + } + + bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME); + // Can't underflow/overflow now + int64_t timeCode = aFrame->GetTimeStamp() / ((int) PR_USEC_PER_MSEC) - mClusterTimecode; + if (isOpus) { + timeCode += mCodecDelay / PR_NSEC_PER_MSEC; + } + MOZ_ASSERT(timeCode >= SHRT_MIN && timeCode <= SHRT_MAX); + writeSimpleBlock(&ebml, isOpus ? 0x2 : 0x1, static_cast<short>(timeCode), isVP8IFrame, + 0, 0, (unsigned char*)aFrame->GetFrameData().Elements(), + aFrame->GetFrameData().Length()); + MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE + + aFrame->GetFrameData().Length(), + "write more data > EBML_BUFFER_SIZE"); + block->SetLength(ebml.offset); +} + +void +EbmlComposer::SetVideoConfig(uint32_t aWidth, uint32_t aHeight, + uint32_t aDisplayWidth, uint32_t aDisplayHeight, + float aFrameRate) +{ + MOZ_ASSERT(aWidth > 0, "Width should > 0"); + MOZ_ASSERT(aHeight > 0, "Height should > 0"); + MOZ_ASSERT(aDisplayWidth > 0, "DisplayWidth should > 0"); + MOZ_ASSERT(aDisplayHeight > 0, "DisplayHeight should > 0"); + MOZ_ASSERT(aFrameRate > 0, "FrameRate should > 0"); + mWidth = aWidth; + mHeight = aHeight; + mDisplayWidth = aDisplayWidth; + mDisplayHeight = aDisplayHeight; + mFrameRate = aFrameRate; +} + +void +EbmlComposer::SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels) +{ + MOZ_ASSERT(aSampleFreq > 0, "SampleFreq should > 0"); + MOZ_ASSERT(aChannels > 0, "Channels should > 0"); + mSampleFreq = aSampleFreq; + mChannels = aChannels; +} + +void +EbmlComposer::ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs, + uint32_t aFlag) +{ + if ((aFlag & ContainerWriter::FLUSH_NEEDED) || + (aFlag & ContainerWriter::GET_HEADER)) + { + FinishMetadata(); + } + if (aFlag & ContainerWriter::FLUSH_NEEDED) + { + FinishCluster(); + } + // aDestBufs may have some element + for (uint32_t i = 0; i < mClusterCanFlushBuffs.Length(); i++) { + aDestBufs->AppendElement()->SwapElements(mClusterCanFlushBuffs[i]); + } + mClusterCanFlushBuffs.Clear(); +} + +EbmlComposer::EbmlComposer() + : mFlushState(FLUSH_NONE) + , mClusterHeaderIndex(0) + , mClusterLengthLoc(0) + , mCodecDelay(0) + , mClusterTimecode(0) + , mWidth(0) + , mHeight(0) + , mFrameRate(0) + , mSampleFreq(0) + , mChannels(0) +{} + +} // namespace mozilla diff --git a/dom/media/webm/EbmlComposer.h b/dom/media/webm/EbmlComposer.h new file mode 100644 index 000000000..389d33528 --- /dev/null +++ b/dom/media/webm/EbmlComposer.h @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EbmlComposer_h_ +#define EbmlComposer_h_ +#include "nsTArray.h" +#include "ContainerWriter.h" + +namespace mozilla { + +/* + * A WebM muxer helper for package the valid WebM format. + */ +class EbmlComposer { +public: + EbmlComposer(); + /* + * Assign the parameter which header required. + */ + void SetVideoConfig(uint32_t aWidth, uint32_t aHeight, uint32_t aDisplayWidth, + uint32_t aDisplayHeight, float aFrameRate); + + void SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels); + /* + * Set the CodecPrivateData for writing in header. + */ + void SetAudioCodecPrivateData(nsTArray<uint8_t>& aBufs) + { + mCodecPrivateData.AppendElements(aBufs); + } + /* + * Generate the whole WebM header and output to mBuff. + */ + void GenerateHeader(); + /* + * Insert media encoded buffer into muxer and it would be package + * into SimpleBlock. If no cluster is opened, new cluster will start for writing. + */ + void WriteSimpleBlock(EncodedFrame* aFrame); + /* + * Get valid cluster data. + */ + void ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs, + uint32_t aFlag = 0); +private: + // Move the metadata data to mClusterCanFlushBuffs. + void FinishMetadata(); + // Close current cluster and move data to mClusterCanFlushBuffs. + void FinishCluster(); + // The temporary storage for cluster data. + nsTArray<nsTArray<uint8_t> > mClusterBuffs; + // The storage which contain valid cluster data. + nsTArray<nsTArray<uint8_t> > mClusterCanFlushBuffs; + + // Indicate the data types in mClusterBuffs. + enum { + FLUSH_NONE = 0, + FLUSH_METADATA = 1 << 0, + FLUSH_CLUSTER = 1 << 1 + }; + uint32_t mFlushState; + // Indicate the cluster header index in mClusterBuffs. + uint32_t mClusterHeaderIndex; + // The cluster length position. + uint64_t mClusterLengthLoc; + // Audio codec specific header data. + nsTArray<uint8_t> mCodecPrivateData; + // Codec delay in nanoseconds. + uint64_t mCodecDelay; + + // The timecode of the cluster. + uint64_t mClusterTimecode; + + // Video configuration + int mWidth; + int mHeight; + int mDisplayWidth; + int mDisplayHeight; + float mFrameRate; + // Audio configuration + float mSampleFreq; + int mChannels; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/NesteggPacketHolder.h b/dom/media/webm/NesteggPacketHolder.h new file mode 100644 index 000000000..c1d0b646f --- /dev/null +++ b/dom/media/webm/NesteggPacketHolder.h @@ -0,0 +1,124 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(NesteggPacketHolder_h_) +#define NesteggPacketHolder_h_ + +#include <stdint.h> +#include "nsAutoRef.h" +#include "nestegg/nestegg.h" + +namespace mozilla { + +// Holds a nestegg_packet, and its file offset. This is needed so we +// know the offset in the file we've played up to, in order to calculate +// whether it's likely we can play through to the end without needing +// to stop to buffer, given the current download rate. +class NesteggPacketHolder { +public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(NesteggPacketHolder) + NesteggPacketHolder() + : mPacket(nullptr) + , mOffset(-1) + , mTimestamp(-1) + , mDuration(-1) + , mIsKeyframe(false) {} + + bool Init(nestegg_packet* aPacket, int64_t aOffset, unsigned aTrack, bool aIsKeyframe) + { + uint64_t timestamp_ns; + if (nestegg_packet_tstamp(aPacket, ×tamp_ns) == -1) { + return false; + } + + // We store the timestamp as signed microseconds so that it's easily + // comparable to other timestamps we have in the system. + mTimestamp = timestamp_ns / 1000; + mPacket = aPacket; + mOffset = aOffset; + mTrack = aTrack; + mIsKeyframe = aIsKeyframe; + + uint64_t duration_ns; + if (!nestegg_packet_duration(aPacket, &duration_ns)) { + mDuration = duration_ns / 1000; + } + return true; + } + + nestegg_packet* Packet() { MOZ_ASSERT(IsInitialized()); return mPacket; } + int64_t Offset() { MOZ_ASSERT(IsInitialized()); return mOffset; } + int64_t Timestamp() { MOZ_ASSERT(IsInitialized()); return mTimestamp; } + int64_t Duration() { MOZ_ASSERT(IsInitialized()); return mDuration; } + unsigned Track() { MOZ_ASSERT(IsInitialized()); return mTrack; } + bool IsKeyframe() { MOZ_ASSERT(IsInitialized()); return mIsKeyframe; } + +private: + ~NesteggPacketHolder() + { + nestegg_free_packet(mPacket); + } + + bool IsInitialized() { return mOffset >= 0; } + + nestegg_packet* mPacket; + + // Offset in bytes. This is the offset of the end of the Block + // which contains the packet. + int64_t mOffset; + + // Packet presentation timestamp in microseconds. + int64_t mTimestamp; + + // Packet duration in microseconds; -1 if unknown or retrieval failed. + int64_t mDuration; + + // Track ID. + unsigned mTrack; + + // Does this packet contain a keyframe? + bool mIsKeyframe; + + // Copy constructor and assignment operator not implemented. Don't use them! + NesteggPacketHolder(const NesteggPacketHolder &aOther); + NesteggPacketHolder& operator= (NesteggPacketHolder const& aOther); +}; + +// Queue for holding nestegg packets. +class WebMPacketQueue { + public: + int32_t GetSize() { + return mQueue.size(); + } + + void Push(NesteggPacketHolder* aItem) { + mQueue.push_back(aItem); + } + + void PushFront(NesteggPacketHolder* aItem) { + mQueue.push_front(Move(aItem)); + } + + already_AddRefed<NesteggPacketHolder> PopFront() { + RefPtr<NesteggPacketHolder> result = mQueue.front().forget(); + mQueue.pop_front(); + return result.forget(); + } + + void Reset() { + while (!mQueue.empty()) { + mQueue.pop_front(); + } + } + +private: + std::deque<RefPtr<NesteggPacketHolder>> mQueue; +}; + + +} // namespace mozilla + +#endif + diff --git a/dom/media/webm/WebMBufferedParser.cpp b/dom/media/webm/WebMBufferedParser.cpp new file mode 100644 index 000000000..21154ab4b --- /dev/null +++ b/dom/media/webm/WebMBufferedParser.cpp @@ -0,0 +1,514 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsAlgorithm.h" +#include "WebMBufferedParser.h" +#include "nsThreadUtils.h" +#include <algorithm> + +extern mozilla::LazyLogModule gMediaDemuxerLog; +#define WEBM_DEBUG(arg, ...) MOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, ("WebMBufferedParser(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) + +namespace mozilla { + +static uint32_t +VIntLength(unsigned char aFirstByte, uint32_t* aMask) +{ + uint32_t count = 1; + uint32_t mask = 1 << 7; + while (count < 8) { + if ((aFirstByte & mask) != 0) { + break; + } + mask >>= 1; + count += 1; + } + if (aMask) { + *aMask = mask; + } + NS_ASSERTION(count >= 1 && count <= 8, "Insane VInt length."); + return count; +} + +bool WebMBufferedParser::Append(const unsigned char* aBuffer, uint32_t aLength, + nsTArray<WebMTimeDataOffset>& aMapping, + ReentrantMonitor& aReentrantMonitor) +{ + static const uint32_t EBML_ID = 0x1a45dfa3; + static const uint32_t SEGMENT_ID = 0x18538067; + static const uint32_t SEGINFO_ID = 0x1549a966; + static const uint32_t TRACKS_ID = 0x1654AE6B; + static const uint32_t CLUSTER_ID = 0x1f43b675; + static const uint32_t TIMECODESCALE_ID = 0x2ad7b1; + static const unsigned char TIMECODE_ID = 0xe7; + static const unsigned char BLOCKGROUP_ID = 0xa0; + static const unsigned char BLOCK_ID = 0xa1; + static const unsigned char SIMPLEBLOCK_ID = 0xa3; + static const uint32_t BLOCK_TIMECODE_LENGTH = 2; + + static const unsigned char CLUSTER_SYNC_ID[] = { 0x1f, 0x43, 0xb6, 0x75 }; + + const unsigned char* p = aBuffer; + + // Parse each byte in aBuffer one-by-one, producing timecodes and updating + // aMapping as we go. Parser pauses at end of stream (which may be at any + // point within the parse) and resumes parsing the next time Append is + // called with new data. + while (p < aBuffer + aLength) { + switch (mState) { + case READ_ELEMENT_ID: + mVIntRaw = true; + mState = READ_VINT; + mNextState = READ_ELEMENT_SIZE; + break; + case READ_ELEMENT_SIZE: + mVIntRaw = false; + mElement.mID = mVInt; + mState = READ_VINT; + mNextState = PARSE_ELEMENT; + break; + case FIND_CLUSTER_SYNC: + if (*p++ == CLUSTER_SYNC_ID[mClusterSyncPos]) { + mClusterSyncPos += 1; + } else { + mClusterSyncPos = 0; + } + if (mClusterSyncPos == sizeof(CLUSTER_SYNC_ID)) { + mVInt.mValue = CLUSTER_ID; + mVInt.mLength = sizeof(CLUSTER_SYNC_ID); + mState = READ_ELEMENT_SIZE; + } + break; + case PARSE_ELEMENT: + mElement.mSize = mVInt; + switch (mElement.mID.mValue) { + case SEGMENT_ID: + mState = READ_ELEMENT_ID; + break; + case SEGINFO_ID: + mGotTimecodeScale = true; + mState = READ_ELEMENT_ID; + break; + case TIMECODE_ID: + mVInt = VInt(); + mVIntLeft = mElement.mSize.mValue; + mState = READ_VINT_REST; + mNextState = READ_CLUSTER_TIMECODE; + break; + case TIMECODESCALE_ID: + mVInt = VInt(); + mVIntLeft = mElement.mSize.mValue; + mState = READ_VINT_REST; + mNextState = READ_TIMECODESCALE; + break; + case CLUSTER_ID: + mClusterOffset = mCurrentOffset + (p - aBuffer) - + (mElement.mID.mLength + mElement.mSize.mLength); + // Handle "unknown" length; + if (mElement.mSize.mValue + 1 != uint64_t(1) << (mElement.mSize.mLength * 7)) { + mClusterEndOffset = mClusterOffset + mElement.mID.mLength + mElement.mSize.mLength + mElement.mSize.mValue; + } else { + mClusterEndOffset = -1; + } + mState = READ_ELEMENT_ID; + break; + case BLOCKGROUP_ID: + mState = READ_ELEMENT_ID; + break; + case SIMPLEBLOCK_ID: + /* FALLTHROUGH */ + case BLOCK_ID: + mBlockSize = mElement.mSize.mValue; + mBlockTimecode = 0; + mBlockTimecodeLength = BLOCK_TIMECODE_LENGTH; + mBlockOffset = mCurrentOffset + (p - aBuffer) - + (mElement.mID.mLength + mElement.mSize.mLength); + mState = READ_VINT; + mNextState = READ_BLOCK_TIMECODE; + break; + case TRACKS_ID: + mSkipBytes = mElement.mSize.mValue; + mState = CHECK_INIT_FOUND; + break; + case EBML_ID: + mLastInitStartOffset = mCurrentOffset + (p - aBuffer) - + (mElement.mID.mLength + mElement.mSize.mLength); + MOZ_FALLTHROUGH; + default: + mSkipBytes = mElement.mSize.mValue; + mState = SKIP_DATA; + mNextState = READ_ELEMENT_ID; + break; + } + break; + case READ_VINT: { + unsigned char c = *p++; + uint32_t mask; + mVInt.mLength = VIntLength(c, &mask); + mVIntLeft = mVInt.mLength - 1; + mVInt.mValue = mVIntRaw ? c : c & ~mask; + mState = READ_VINT_REST; + break; + } + case READ_VINT_REST: + if (mVIntLeft) { + mVInt.mValue <<= 8; + mVInt.mValue |= *p++; + mVIntLeft -= 1; + } else { + mState = mNextState; + } + break; + case READ_TIMECODESCALE: + if (!mGotTimecodeScale) { + return false; + } + mTimecodeScale = mVInt.mValue; + mState = READ_ELEMENT_ID; + break; + case READ_CLUSTER_TIMECODE: + mClusterTimecode = mVInt.mValue; + mState = READ_ELEMENT_ID; + break; + case READ_BLOCK_TIMECODE: + if (mBlockTimecodeLength) { + mBlockTimecode <<= 8; + mBlockTimecode |= *p++; + mBlockTimecodeLength -= 1; + } else { + // It's possible we've parsed this data before, so avoid inserting + // duplicate WebMTimeDataOffset entries. + { + ReentrantMonitorAutoEnter mon(aReentrantMonitor); + int64_t endOffset = mBlockOffset + mBlockSize + + mElement.mID.mLength + mElement.mSize.mLength; + uint32_t idx = aMapping.IndexOfFirstElementGt(endOffset); + if (idx == 0 || aMapping[idx - 1] != endOffset) { + // Don't insert invalid negative timecodes. + if (mBlockTimecode >= 0 || mClusterTimecode >= uint16_t(abs(mBlockTimecode))) { + if (!mGotTimecodeScale) { + return false; + } + uint64_t absTimecode = mClusterTimecode + mBlockTimecode; + absTimecode *= mTimecodeScale; + // Avoid creating an entry if the timecode is out of order + // (invalid according to the WebM specification) so that + // ordering invariants of aMapping are not violated. + if (idx == 0 || + aMapping[idx - 1].mTimecode <= absTimecode || + (idx + 1 < aMapping.Length() && + aMapping[idx + 1].mTimecode >= absTimecode)) { + WebMTimeDataOffset entry(endOffset, absTimecode, mLastInitStartOffset, + mClusterOffset, mClusterEndOffset); + aMapping.InsertElementAt(idx, entry); + } else { + WEBM_DEBUG("Out of order timecode %llu in Cluster at %lld ignored", + absTimecode, mClusterOffset); + } + } + } + } + + // Skip rest of block header and the block's payload. + mBlockSize -= mVInt.mLength; + mBlockSize -= BLOCK_TIMECODE_LENGTH; + mSkipBytes = uint32_t(mBlockSize); + mState = SKIP_DATA; + mNextState = READ_ELEMENT_ID; + } + break; + case SKIP_DATA: + if (mSkipBytes) { + uint32_t left = aLength - (p - aBuffer); + left = std::min(left, mSkipBytes); + p += left; + mSkipBytes -= left; + } + if (!mSkipBytes) { + mBlockEndOffset = mCurrentOffset + (p - aBuffer); + mState = mNextState; + } + break; + case CHECK_INIT_FOUND: + if (mSkipBytes) { + uint32_t left = aLength - (p - aBuffer); + left = std::min(left, mSkipBytes); + p += left; + mSkipBytes -= left; + } + if (!mSkipBytes) { + if (mInitEndOffset < 0) { + mInitEndOffset = mCurrentOffset + (p - aBuffer); + mBlockEndOffset = mCurrentOffset + (p - aBuffer); + } + mState = READ_ELEMENT_ID; + } + break; + } + } + + NS_ASSERTION(p == aBuffer + aLength, "Must have parsed to end of data."); + mCurrentOffset += aLength; + + return true; +} + +int64_t +WebMBufferedParser::EndSegmentOffset(int64_t aOffset) +{ + if (mLastInitStartOffset > aOffset || mClusterOffset > aOffset) { + return std::min(mLastInitStartOffset >= 0 ? mLastInitStartOffset : INT64_MAX, + mClusterOffset >= 0 ? mClusterOffset : INT64_MAX); + } + return mBlockEndOffset; +} + +// SyncOffsetComparator and TimeComparator are slightly confusing, in that +// the nsTArray they're used with (mTimeMapping) is sorted by mEndOffset and +// these comparators are used on the other fields of WebMTimeDataOffset. +// This is only valid because timecodes are required to be monotonically +// increasing within a file (thus establishing an ordering relationship with +// mTimecode), and mEndOffset is derived from mSyncOffset. +struct SyncOffsetComparator { + bool Equals(const WebMTimeDataOffset& a, const int64_t& b) const { + return a.mSyncOffset == b; + } + + bool LessThan(const WebMTimeDataOffset& a, const int64_t& b) const { + return a.mSyncOffset < b; + } +}; + +struct TimeComparator { + bool Equals(const WebMTimeDataOffset& a, const uint64_t& b) const { + return a.mTimecode == b; + } + + bool LessThan(const WebMTimeDataOffset& a, const uint64_t& b) const { + return a.mTimecode < b; + } +}; + +bool WebMBufferedState::CalculateBufferedForRange(int64_t aStartOffset, int64_t aEndOffset, + uint64_t* aStartTime, uint64_t* aEndTime) +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + // Find the first WebMTimeDataOffset at or after aStartOffset. + uint32_t start = mTimeMapping.IndexOfFirstElementGt(aStartOffset - 1, SyncOffsetComparator()); + if (start == mTimeMapping.Length()) { + return false; + } + + // Find the first WebMTimeDataOffset at or before aEndOffset. + uint32_t end = mTimeMapping.IndexOfFirstElementGt(aEndOffset); + if (end > 0) { + end -= 1; + } + + // Range is empty. + if (end <= start) { + return false; + } + + NS_ASSERTION(mTimeMapping[start].mSyncOffset >= aStartOffset && + mTimeMapping[end].mEndOffset <= aEndOffset, + "Computed time range must lie within data range."); + if (start > 0) { + NS_ASSERTION(mTimeMapping[start - 1].mSyncOffset < aStartOffset, + "Must have found least WebMTimeDataOffset for start"); + } + if (end < mTimeMapping.Length() - 1) { + NS_ASSERTION(mTimeMapping[end + 1].mEndOffset > aEndOffset, + "Must have found greatest WebMTimeDataOffset for end"); + } + + MOZ_ASSERT(mTimeMapping[end].mTimecode >= mTimeMapping[end - 1].mTimecode); + uint64_t frameDuration = mTimeMapping[end].mTimecode - mTimeMapping[end - 1].mTimecode; + *aStartTime = mTimeMapping[start].mTimecode; + *aEndTime = mTimeMapping[end].mTimecode + frameDuration; + return true; +} + +bool WebMBufferedState::GetOffsetForTime(uint64_t aTime, int64_t* aOffset) +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + if(mTimeMapping.IsEmpty()) { + return false; + } + + uint64_t time = aTime; + if (time > 0) { + time = time - 1; + } + uint32_t idx = mTimeMapping.IndexOfFirstElementGt(time, TimeComparator()); + if (idx == mTimeMapping.Length()) { + // Clamp to end + *aOffset = mTimeMapping[mTimeMapping.Length() - 1].mSyncOffset; + } else { + // Idx is within array or has been clamped to start + *aOffset = mTimeMapping[idx].mSyncOffset; + } + return true; +} + +void WebMBufferedState::NotifyDataArrived(const unsigned char* aBuffer, uint32_t aLength, int64_t aOffset) +{ + uint32_t idx = mRangeParsers.IndexOfFirstElementGt(aOffset - 1); + if (idx == 0 || !(mRangeParsers[idx-1] == aOffset)) { + // If the incoming data overlaps an already parsed range, adjust the + // buffer so that we only reparse the new data. It's also possible to + // have an overlap where the end of the incoming data is within an + // already parsed range, but we don't bother handling that other than by + // avoiding storing duplicate timecodes when the parser runs. + if (idx != mRangeParsers.Length() && mRangeParsers[idx].mStartOffset <= aOffset) { + // Complete overlap, skip parsing. + if (aOffset + aLength <= mRangeParsers[idx].mCurrentOffset) { + return; + } + + // Partial overlap, adjust the buffer to parse only the new data. + int64_t adjust = mRangeParsers[idx].mCurrentOffset - aOffset; + NS_ASSERTION(adjust >= 0, "Overlap detection bug."); + aBuffer += adjust; + aLength -= uint32_t(adjust); + } else { + mRangeParsers.InsertElementAt(idx, WebMBufferedParser(aOffset)); + if (idx != 0) { + mRangeParsers[idx].SetTimecodeScale(mRangeParsers[0].GetTimecodeScale()); + } + } + } + + mRangeParsers[idx].Append(aBuffer, + aLength, + mTimeMapping, + mReentrantMonitor); + + // Merge parsers with overlapping regions and clean up the remnants. + uint32_t i = 0; + while (i + 1 < mRangeParsers.Length()) { + if (mRangeParsers[i].mCurrentOffset >= mRangeParsers[i + 1].mStartOffset) { + mRangeParsers[i + 1].mStartOffset = mRangeParsers[i].mStartOffset; + mRangeParsers[i + 1].mInitEndOffset = mRangeParsers[i].mInitEndOffset; + mRangeParsers.RemoveElementAt(i); + } else { + i += 1; + } + } + + if (mRangeParsers.IsEmpty()) { + return; + } + + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + mLastBlockOffset = mRangeParsers.LastElement().mBlockEndOffset; +} + +void WebMBufferedState::Reset() { + mRangeParsers.Clear(); + mTimeMapping.Clear(); +} + +void WebMBufferedState::UpdateIndex(const MediaByteRangeSet& aRanges, MediaResource* aResource) +{ + for (uint32_t index = 0; index < aRanges.Length(); index++) { + const MediaByteRange& range = aRanges[index]; + int64_t offset = range.mStart; + uint32_t length = range.mEnd - range.mStart; + + uint32_t idx = mRangeParsers.IndexOfFirstElementGt(offset - 1); + if (!idx || !(mRangeParsers[idx-1] == offset)) { + // If the incoming data overlaps an already parsed range, adjust the + // buffer so that we only reparse the new data. It's also possible to + // have an overlap where the end of the incoming data is within an + // already parsed range, but we don't bother handling that other than by + // avoiding storing duplicate timecodes when the parser runs. + if (idx != mRangeParsers.Length() && mRangeParsers[idx].mStartOffset <= offset) { + // Complete overlap, skip parsing. + if (offset + length <= mRangeParsers[idx].mCurrentOffset) { + continue; + } + + // Partial overlap, adjust the buffer to parse only the new data. + int64_t adjust = mRangeParsers[idx].mCurrentOffset - offset; + NS_ASSERTION(adjust >= 0, "Overlap detection bug."); + offset += adjust; + length -= uint32_t(adjust); + } else { + mRangeParsers.InsertElementAt(idx, WebMBufferedParser(offset)); + if (idx) { + mRangeParsers[idx].SetTimecodeScale(mRangeParsers[0].GetTimecodeScale()); + } + } + } + while (length > 0) { + static const uint32_t BLOCK_SIZE = 1048576; + uint32_t block = std::min(length, BLOCK_SIZE); + RefPtr<MediaByteBuffer> bytes = aResource->MediaReadAt(offset, block); + if (!bytes) { + break; + } + NotifyDataArrived(bytes->Elements(), bytes->Length(), offset); + length -= bytes->Length(); + offset += bytes->Length(); + } + } +} + +int64_t WebMBufferedState::GetInitEndOffset() +{ + if (mRangeParsers.IsEmpty()) { + return -1; + } + return mRangeParsers[0].mInitEndOffset; +} + +int64_t WebMBufferedState::GetLastBlockOffset() +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + return mLastBlockOffset; +} + +bool WebMBufferedState::GetStartTime(uint64_t *aTime) +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + if (mTimeMapping.IsEmpty()) { + return false; + } + + uint32_t idx = mTimeMapping.IndexOfFirstElementGt(0, SyncOffsetComparator()); + if (idx == mTimeMapping.Length()) { + return false; + } + + *aTime = mTimeMapping[idx].mTimecode; + return true; +} + +bool +WebMBufferedState::GetNextKeyframeTime(uint64_t aTime, uint64_t* aKeyframeTime) +{ + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + int64_t offset = 0; + bool rv = GetOffsetForTime(aTime, &offset); + if (!rv) { + return false; + } + uint32_t idx = mTimeMapping.IndexOfFirstElementGt(offset, SyncOffsetComparator()); + if (idx == mTimeMapping.Length()) { + return false; + } + *aKeyframeTime = mTimeMapping[idx].mTimecode; + return true; +} +} // namespace mozilla + +#undef WEBM_DEBUG + diff --git a/dom/media/webm/WebMBufferedParser.h b/dom/media/webm/WebMBufferedParser.h new file mode 100644 index 000000000..bc3de4ba0 --- /dev/null +++ b/dom/media/webm/WebMBufferedParser.h @@ -0,0 +1,322 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(WebMBufferedParser_h_) +#define WebMBufferedParser_h_ + +#include "nsISupportsImpl.h" +#include "nsTArray.h" +#include "mozilla/ReentrantMonitor.h" +#include "MediaResource.h" + +namespace mozilla { + +// Stores a stream byte offset and the scaled timecode of the block at +// that offset. +struct WebMTimeDataOffset +{ + WebMTimeDataOffset(int64_t aEndOffset, uint64_t aTimecode, + int64_t aInitOffset, int64_t aSyncOffset, + int64_t aClusterEndOffset) + : mEndOffset(aEndOffset) + , mInitOffset(aInitOffset) + , mSyncOffset(aSyncOffset) + , mClusterEndOffset(aClusterEndOffset) + , mTimecode(aTimecode) + {} + + bool operator==(int64_t aEndOffset) const { + return mEndOffset == aEndOffset; + } + + bool operator!=(int64_t aEndOffset) const { + return mEndOffset != aEndOffset; + } + + bool operator<(int64_t aEndOffset) const { + return mEndOffset < aEndOffset; + } + + int64_t mEndOffset; + int64_t mInitOffset; + int64_t mSyncOffset; + int64_t mClusterEndOffset; + uint64_t mTimecode; +}; + +// A simple WebM parser that produces data offset to timecode pairs as it +// consumes blocks. A new parser is created for each distinct range of data +// received and begins parsing from the first WebM cluster within that +// range. Old parsers are destroyed when their range merges with a later +// parser or an already parsed range. The parser may start at any position +// within the stream. +struct WebMBufferedParser +{ + explicit WebMBufferedParser(int64_t aOffset) + : mStartOffset(aOffset) + , mCurrentOffset(aOffset) + , mInitEndOffset(-1) + , mBlockEndOffset(-1) + , mState(READ_ELEMENT_ID) + , mNextState(READ_ELEMENT_ID) + , mVIntRaw(false) + , mLastInitStartOffset(-1) + , mClusterSyncPos(0) + , mVIntLeft(0) + , mBlockSize(0) + , mClusterTimecode(0) + , mClusterOffset(0) + , mClusterEndOffset(-1) + , mBlockOffset(0) + , mBlockTimecode(0) + , mBlockTimecodeLength(0) + , mSkipBytes(0) + , mTimecodeScale(1000000) + , mGotTimecodeScale(false) + { + if (mStartOffset != 0) { + mState = FIND_CLUSTER_SYNC; + } + } + + uint32_t GetTimecodeScale() { + MOZ_ASSERT(mGotTimecodeScale); + return mTimecodeScale; + } + + // If this parser is not expected to parse a segment info, it must be told + // the appropriate timecode scale to use from elsewhere. + void SetTimecodeScale(uint32_t aTimecodeScale) { + mTimecodeScale = aTimecodeScale; + mGotTimecodeScale = true; + } + + // Steps the parser through aLength bytes of data. Always consumes + // aLength bytes. Updates mCurrentOffset before returning. Acquires + // aReentrantMonitor before using aMapping. + // Returns false if an error was encountered. + bool Append(const unsigned char* aBuffer, uint32_t aLength, + nsTArray<WebMTimeDataOffset>& aMapping, + ReentrantMonitor& aReentrantMonitor); + + bool operator==(int64_t aOffset) const { + return mCurrentOffset == aOffset; + } + + bool operator<(int64_t aOffset) const { + return mCurrentOffset < aOffset; + } + + // Returns the start offset of the init (EBML) or media segment (Cluster) + // following the aOffset position. If none were found, returns mBlockEndOffset. + // This allows to determine the end of the interval containg aOffset. + int64_t EndSegmentOffset(int64_t aOffset); + + // The offset at which this parser started parsing. Used to merge + // adjacent parsers, in which case the later parser adopts the earlier + // parser's mStartOffset. + int64_t mStartOffset; + + // Current offset within the stream. Updated in chunks as Append() consumes + // data. + int64_t mCurrentOffset; + + // Tracks element's end offset. This indicates the end of the first init + // segment. Will only be set if a Segment Information has been found. + int64_t mInitEndOffset; + + // End offset of the last block parsed. + // Will only be set if a complete block has been parsed. + int64_t mBlockEndOffset; + +private: + enum State { + // Parser start state. Expects to begin at a valid EBML element. Move + // to READ_VINT with mVIntRaw true, then return to READ_ELEMENT_SIZE. + READ_ELEMENT_ID, + + // Store element ID read into mVInt into mElement.mID. Move to + // READ_VINT with mVIntRaw false, then return to PARSE_ELEMENT. + READ_ELEMENT_SIZE, + + // Parser start state for parsers started at an arbitrary offset. Scans + // forward for the first cluster, then move to READ_ELEMENT_ID. + FIND_CLUSTER_SYNC, + + // Simplistic core of the parser. Does not pay attention to nesting of + // elements. Checks mElement for an element ID of interest, then moves + // to the next state as determined by the element ID. + PARSE_ELEMENT, + + // Read the first byte of a variable length integer. The first byte + // encodes both the variable integer's length and part of the value. + // The value read so far is stored in mVInt.mValue and the length is + // stored in mVInt.mLength. The number of bytes left to read is stored + // in mVIntLeft. + READ_VINT, + + // Reads the remaining mVIntLeft bytes into mVInt.mValue. + READ_VINT_REST, + + // mVInt holds the parsed timecode scale, store it in mTimecodeScale, + // then return READ_ELEMENT_ID. + READ_TIMECODESCALE, + + // mVInt holds the parsed cluster timecode, store it in + // mClusterTimecode, then return to READ_ELEMENT_ID. + READ_CLUSTER_TIMECODE, + + // mBlockTimecodeLength holds the remaining length of the block timecode + // left to read. Read each byte of the timecode into mBlockTimecode. + // Once complete, calculate the scaled timecode from the cluster + // timecode, block timecode, and timecode scale, and insert a + // WebMTimeDataOffset entry into aMapping if one is not already present + // for this offset. + READ_BLOCK_TIMECODE, + + // Will skip the current tracks element and set mInitEndOffset if an init + // segment has been found. + // Currently, only assumes it's the end of the tracks element. + CHECK_INIT_FOUND, + + // Skip mSkipBytes of data before resuming parse at mNextState. + SKIP_DATA, + }; + + // Current state machine action. + State mState; + + // Next state machine action. SKIP_DATA and READ_VINT_REST advance to + // mNextState when the current action completes. + State mNextState; + + struct VInt { + VInt() : mValue(0), mLength(0) {} + uint64_t mValue; + uint64_t mLength; + }; + + struct EBMLElement { + uint64_t Length() { return mID.mLength + mSize.mLength; } + VInt mID; + VInt mSize; + }; + + EBMLElement mElement; + + VInt mVInt; + + bool mVIntRaw; + + // EBML start offset. This indicates the start of the last init segment + // parsed. Will only be set if an EBML element has been found. + int64_t mLastInitStartOffset; + + // Current match position within CLUSTER_SYNC_ID. Used to find sync + // within arbitrary data. + uint32_t mClusterSyncPos; + + // Number of bytes of mVInt left to read. mVInt is complete once this + // reaches 0. + uint32_t mVIntLeft; + + // Size of the block currently being parsed. Any unused data within the + // block is skipped once the block timecode has been parsed. + uint64_t mBlockSize; + + // Cluster-level timecode. + uint64_t mClusterTimecode; + + // Start offset of the cluster currently being parsed. Used as the sync + // point offset for the offset-to-time mapping as each block timecode is + // been parsed. + int64_t mClusterOffset; + + // End offset of the cluster currently being parsed. -1 if unknown. + int64_t mClusterEndOffset; + + // Start offset of the block currently being parsed. Used as the byte + // offset for the offset-to-time mapping once the block timecode has been + // parsed. + int64_t mBlockOffset; + + // Block-level timecode. This is summed with mClusterTimecode to produce + // an absolute timecode for the offset-to-time mapping. + int16_t mBlockTimecode; + + // Number of bytes of mBlockTimecode left to read. + uint32_t mBlockTimecodeLength; + + // Count of bytes left to skip before resuming parse at mNextState. + // Mostly used to skip block payload data after reading a block timecode. + uint32_t mSkipBytes; + + // Timecode scale read from the segment info and used to scale absolute + // timecodes. + uint32_t mTimecodeScale; + + // True if we read the timecode scale from the segment info or have + // confirmed that the default value is to be used. + bool mGotTimecodeScale; +}; + +class WebMBufferedState final +{ + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(WebMBufferedState) + +public: + WebMBufferedState() + : mReentrantMonitor("WebMBufferedState") + , mLastBlockOffset(-1) + { + MOZ_COUNT_CTOR(WebMBufferedState); + } + + void NotifyDataArrived(const unsigned char* aBuffer, uint32_t aLength, int64_t aOffset); + void Reset(); + void UpdateIndex(const MediaByteRangeSet& aRanges, MediaResource* aResource); + bool CalculateBufferedForRange(int64_t aStartOffset, int64_t aEndOffset, + uint64_t* aStartTime, uint64_t* aEndTime); + + // Returns true if mTimeMapping is not empty and sets aOffset to + // the latest offset for which decoding can resume without data + // dependencies to arrive at aTime. aTime will be clamped to the start + // of mTimeMapping if it is earlier than the first element, and to the end + // if later than the last + bool GetOffsetForTime(uint64_t aTime, int64_t* aOffset); + + // Returns end offset of init segment or -1 if none found. + int64_t GetInitEndOffset(); + // Returns the end offset of the last complete block or -1 if none found. + int64_t GetLastBlockOffset(); + + // Returns start time + bool GetStartTime(uint64_t *aTime); + + // Returns keyframe for time + bool GetNextKeyframeTime(uint64_t aTime, uint64_t* aKeyframeTime); + +private: + // Private destructor, to discourage deletion outside of Release(): + ~WebMBufferedState() { + MOZ_COUNT_DTOR(WebMBufferedState); + } + + // Synchronizes access to the mTimeMapping array and mLastBlockOffset. + ReentrantMonitor mReentrantMonitor; + + // Sorted (by offset) map of data offsets to timecodes. Populated + // on the main thread as data is received and parsed by WebMBufferedParsers. + nsTArray<WebMTimeDataOffset> mTimeMapping; + // The last complete block parsed. -1 if not set. + int64_t mLastBlockOffset; + + // Sorted (by offset) live parser instances. Main thread only. + nsTArray<WebMBufferedParser> mRangeParsers; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMDecoder.cpp b/dom/media/webm/WebMDecoder.cpp new file mode 100644 index 000000000..b41de6d40 --- /dev/null +++ b/dom/media/webm/WebMDecoder.cpp @@ -0,0 +1,99 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Preferences.h" +#include "MediaDecoderStateMachine.h" +#include "WebMDemuxer.h" +#include "WebMDecoder.h" +#include "VideoUtils.h" +#include "nsContentTypeParser.h" + +namespace mozilla { + +MediaDecoderStateMachine* WebMDecoder::CreateStateMachine() +{ + mReader = + new MediaFormatReader(this, new WebMDemuxer(GetResource()), GetVideoFrameContainer()); + return new MediaDecoderStateMachine(this, mReader); +} + +/* static */ +bool +WebMDecoder::IsEnabled() +{ + return Preferences::GetBool("media.webm.enabled"); +} + +/* static */ +bool +WebMDecoder::CanHandleMediaType(const nsACString& aMIMETypeExcludingCodecs, + const nsAString& aCodecs) +{ + if (!IsEnabled()) { + return false; + } + + const bool isWebMAudio = aMIMETypeExcludingCodecs.EqualsASCII("audio/webm"); + const bool isWebMVideo = aMIMETypeExcludingCodecs.EqualsASCII("video/webm"); + if (!isWebMAudio && !isWebMVideo) { + return false; + } + + nsTArray<nsCString> codecMimes; + if (aCodecs.IsEmpty()) { + // WebM guarantees that the only codecs it contained are vp8, vp9, opus or vorbis. + return true; + } + // Verify that all the codecs specified are ones that we expect that + // we can play. + nsTArray<nsString> codecs; + if (!ParseCodecsString(aCodecs, codecs)) { + return false; + } + for (const nsString& codec : codecs) { + if (codec.EqualsLiteral("opus") || codec.EqualsLiteral("vorbis")) { + continue; + } + // Note: Only accept VP8/VP9 in a video content type, not in an audio + // content type. + if (isWebMVideo && + (codec.EqualsLiteral("vp8") || codec.EqualsLiteral("vp8.0") || + codec.EqualsLiteral("vp9") || codec.EqualsLiteral("vp9.0"))) { + + continue; + } + // Some unsupported codec. + return false; + } + return true; +} + +/* static */ bool +WebMDecoder::CanHandleMediaType(const nsAString& aContentType) +{ + nsContentTypeParser parser(aContentType); + nsAutoString mimeType; + nsresult rv = parser.GetType(mimeType); + if (NS_FAILED(rv)) { + return false; + } + nsString codecs; + parser.GetParameter("codecs", codecs); + + return CanHandleMediaType(NS_ConvertUTF16toUTF8(mimeType), + codecs); +} + +void +WebMDecoder::GetMozDebugReaderData(nsAString& aString) +{ + if (mReader) { + mReader->GetMozDebugReaderData(aString); + } +} + +} // namespace mozilla + diff --git a/dom/media/webm/WebMDecoder.h b/dom/media/webm/WebMDecoder.h new file mode 100644 index 000000000..0d6d747f6 --- /dev/null +++ b/dom/media/webm/WebMDecoder.h @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(WebMDecoder_h_) +#define WebMDecoder_h_ + +#include "MediaDecoder.h" +#include "MediaFormatReader.h" + +namespace mozilla { + +class WebMDecoder : public MediaDecoder +{ +public: + explicit WebMDecoder(MediaDecoderOwner* aOwner) : MediaDecoder(aOwner) {} + MediaDecoder* Clone(MediaDecoderOwner* aOwner) override { + if (!IsWebMEnabled()) { + return nullptr; + } + return new WebMDecoder(aOwner); + } + MediaDecoderStateMachine* CreateStateMachine() override; + + // Returns true if the WebM backend is preffed on. + static bool IsEnabled(); + + // Returns true if aMIMEType is a type that we think we can render with the + // a WebM platform decoder backend. If aCodecs is non emtpy, it is filled + // with a comma-delimited list of codecs to check support for. Notes in + // out params whether the codecs string contains Opus/Vorbis or VP8/VP9. + static bool CanHandleMediaType(const nsACString& aMIMETypeExcludingCodecs, + const nsAString& aCodecs); + + static bool CanHandleMediaType(const nsAString& aContentType); + + void GetMozDebugReaderData(nsAString& aString) override; + +private: + RefPtr<MediaFormatReader> mReader; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMDemuxer.cpp b/dom/media/webm/WebMDemuxer.cpp new file mode 100644 index 000000000..20ed71581 --- /dev/null +++ b/dom/media/webm/WebMDemuxer.cpp @@ -0,0 +1,1155 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsError.h" +#include "MediaDecoderStateMachine.h" +#include "AbstractMediaDecoder.h" +#include "MediaResource.h" +#include "OpusDecoder.h" +#include "WebMDemuxer.h" +#include "WebMBufferedParser.h" +#include "gfx2DGlue.h" +#include "mozilla/Atomics.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/SharedThreadPool.h" +#include "MediaDataDemuxer.h" +#include "nsAutoPtr.h" +#include "nsAutoRef.h" +#include "NesteggPacketHolder.h" +#include "XiphExtradata.h" +#include "prprf.h" // leaving it for PR_vsnprintf() +#include "mozilla/Sprintf.h" + +#include <algorithm> +#include <stdint.h> + +#define VPX_DONT_DEFINE_STDINT_TYPES +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +#define WEBM_DEBUG(arg, ...) MOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, ("WebMDemuxer(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) +extern mozilla::LazyLogModule gMediaDemuxerLog; + +namespace mozilla { + +using namespace gfx; + +LazyLogModule gNesteggLog("Nestegg"); + +// How far ahead will we look when searching future keyframe. In microseconds. +// This value is based on what appears to be a reasonable value as most webm +// files encountered appear to have keyframes located < 4s. +#define MAX_LOOK_AHEAD 10000000 + +static Atomic<uint32_t> sStreamSourceID(0u); + +// Functions for reading and seeking using WebMDemuxer required for +// nestegg_io. The 'user data' passed to these functions is the +// demuxer. +static int webmdemux_read(void* aBuffer, size_t aLength, void* aUserData) +{ + MOZ_ASSERT(aUserData); + MOZ_ASSERT(aLength < UINT32_MAX); + WebMDemuxer::NestEggContext* context = + reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); + uint32_t count = aLength; + if (context->IsMediaSource()) { + int64_t length = context->GetEndDataOffset(); + int64_t position = context->GetResource()->Tell(); + MOZ_ASSERT(position <= context->GetResource()->GetLength()); + MOZ_ASSERT(position <= length); + if (length >= 0 && count + position > length) { + count = length - position; + } + MOZ_ASSERT(count <= aLength); + } + uint32_t bytes = 0; + nsresult rv = + context->GetResource()->Read(static_cast<char*>(aBuffer), count, &bytes); + bool eof = bytes < aLength; + return NS_FAILED(rv) ? -1 : eof ? 0 : 1; +} + +static int webmdemux_seek(int64_t aOffset, int aWhence, void* aUserData) +{ + MOZ_ASSERT(aUserData); + WebMDemuxer::NestEggContext* context = reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); + nsresult rv = context->GetResource()->Seek(aWhence, aOffset); + return NS_SUCCEEDED(rv) ? 0 : -1; +} + +static int64_t webmdemux_tell(void* aUserData) +{ + MOZ_ASSERT(aUserData); + WebMDemuxer::NestEggContext* context = reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); + return context->GetResource()->Tell(); +} + +static void webmdemux_log(nestegg* aContext, + unsigned int aSeverity, + char const* aFormat, ...) +{ + if (!MOZ_LOG_TEST(gNesteggLog, LogLevel::Debug)) { + return; + } + + va_list args; + char msg[256]; + const char* sevStr; + + switch(aSeverity) { + case NESTEGG_LOG_DEBUG: + sevStr = "DBG"; + break; + case NESTEGG_LOG_INFO: + sevStr = "INF"; + break; + case NESTEGG_LOG_WARNING: + sevStr = "WRN"; + break; + case NESTEGG_LOG_ERROR: + sevStr = "ERR"; + break; + case NESTEGG_LOG_CRITICAL: + sevStr = "CRT"; + break; + default: + sevStr = "UNK"; + break; + } + + va_start(args, aFormat); + + SprintfLiteral(msg, "%p [Nestegg-%s] ", aContext, sevStr); + PR_vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), aFormat, args); + MOZ_LOG(gNesteggLog, LogLevel::Debug, (msg)); + + va_end(args); +} + +WebMDemuxer::NestEggContext::~NestEggContext() +{ + if (mContext) { + nestegg_destroy(mContext); + } +} + +int +WebMDemuxer::NestEggContext::Init() +{ + nestegg_io io; + io.read = webmdemux_read; + io.seek = webmdemux_seek; + io.tell = webmdemux_tell; + io.userdata = this; + + // While reading the metadata, we do not really care about which nestegg + // context is being used so long that they are both initialised. + // For reading the metadata however, we will use mVideoContext. + return nestegg_init(&mContext, io, &webmdemux_log, + mParent->IsMediaSource() ? mResource.GetLength() : -1); +} + +WebMDemuxer::WebMDemuxer(MediaResource* aResource) + : WebMDemuxer(aResource, false) +{ +} + +WebMDemuxer::WebMDemuxer(MediaResource* aResource, bool aIsMediaSource) + : mVideoContext(this, aResource) + , mAudioContext(this, aResource) + , mBufferedState(nullptr) + , mInitData(nullptr) + , mVideoTrack(0) + , mAudioTrack(0) + , mSeekPreroll(0) + , mAudioCodec(-1) + , mVideoCodec(-1) + , mHasVideo(false) + , mHasAudio(false) + , mNeedReIndex(true) + , mLastWebMBlockOffset(-1) + , mIsMediaSource(aIsMediaSource) +{ +} + +WebMDemuxer::~WebMDemuxer() +{ + Reset(TrackInfo::kVideoTrack); + Reset(TrackInfo::kAudioTrack); +} + +RefPtr<WebMDemuxer::InitPromise> +WebMDemuxer::Init() +{ + InitBufferedState(); + + if (NS_FAILED(ReadMetadata())) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, __func__); + } + + if (!GetNumberTracks(TrackInfo::kAudioTrack) && + !GetNumberTracks(TrackInfo::kVideoTrack)) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, __func__); + } + + return InitPromise::CreateAndResolve(NS_OK, __func__); +} + +void +WebMDemuxer::InitBufferedState() +{ + MOZ_ASSERT(!mBufferedState); + mBufferedState = new WebMBufferedState; +} + +bool +WebMDemuxer::HasTrackType(TrackInfo::TrackType aType) const +{ + return !!GetNumberTracks(aType); +} + +uint32_t +WebMDemuxer::GetNumberTracks(TrackInfo::TrackType aType) const +{ + switch(aType) { + case TrackInfo::kAudioTrack: + return mHasAudio ? 1 : 0; + case TrackInfo::kVideoTrack: + return mHasVideo ? 1 : 0; + default: + return 0; + } +} + +UniquePtr<TrackInfo> +WebMDemuxer::GetTrackInfo(TrackInfo::TrackType aType, + size_t aTrackNumber) const +{ + switch(aType) { + case TrackInfo::kAudioTrack: + return mInfo.mAudio.Clone(); + case TrackInfo::kVideoTrack: + return mInfo.mVideo.Clone(); + default: + return nullptr; + } +} + +already_AddRefed<MediaTrackDemuxer> +WebMDemuxer::GetTrackDemuxer(TrackInfo::TrackType aType, uint32_t aTrackNumber) +{ + if (GetNumberTracks(aType) <= aTrackNumber) { + return nullptr; + } + RefPtr<WebMTrackDemuxer> e = + new WebMTrackDemuxer(this, aType, aTrackNumber); + mDemuxers.AppendElement(e); + + return e.forget(); +} + +nsresult +WebMDemuxer::Reset(TrackInfo::TrackType aType) +{ + if (aType == TrackInfo::kVideoTrack) { + mVideoPackets.Reset(); + } else { + mAudioPackets.Reset(); + } + return NS_OK; +} + +nsresult +WebMDemuxer::ReadMetadata() +{ + int r = mVideoContext.Init(); + if (r == -1) { + return NS_ERROR_FAILURE; + } + if (mAudioContext.Init() == -1) { + return NS_ERROR_FAILURE; + } + + // For reading the metadata we can only use the video resource/context. + MediaResourceIndex& resource = Resource(TrackInfo::kVideoTrack); + nestegg* context = Context(TrackInfo::kVideoTrack); + + { + // Check how much data nestegg read and force feed it to BufferedState. + RefPtr<MediaByteBuffer> buffer = resource.MediaReadAt(0, resource.Tell()); + if (!buffer) { + return NS_ERROR_FAILURE; + } + mBufferedState->NotifyDataArrived(buffer->Elements(), buffer->Length(), 0); + if (mBufferedState->GetInitEndOffset() < 0) { + return NS_ERROR_FAILURE; + } + MOZ_ASSERT(mBufferedState->GetInitEndOffset() <= resource.Tell()); + } + mInitData = resource.MediaReadAt(0, mBufferedState->GetInitEndOffset()); + if (!mInitData || + mInitData->Length() != size_t(mBufferedState->GetInitEndOffset())) { + return NS_ERROR_FAILURE; + } + + unsigned int ntracks = 0; + r = nestegg_track_count(context, &ntracks); + if (r == -1) { + return NS_ERROR_FAILURE; + } + + for (unsigned int track = 0; track < ntracks; ++track) { + int id = nestegg_track_codec_id(context, track); + if (id == -1) { + return NS_ERROR_FAILURE; + } + int type = nestegg_track_type(context, track); + if (type == NESTEGG_TRACK_VIDEO && !mHasVideo) { + nestegg_video_params params; + r = nestegg_track_video_params(context, track, ¶ms); + if (r == -1) { + return NS_ERROR_FAILURE; + } + mVideoCodec = nestegg_track_codec_id(context, track); + switch(mVideoCodec) { + case NESTEGG_CODEC_VP8: + mInfo.mVideo.mMimeType = "video/webm; codecs=vp8"; + break; + case NESTEGG_CODEC_VP9: + mInfo.mVideo.mMimeType = "video/webm; codecs=vp9"; + break; + default: + NS_WARNING("Unknown WebM video codec"); + return NS_ERROR_FAILURE; + } + // Picture region, taking into account cropping, before scaling + // to the display size. + unsigned int cropH = params.crop_right + params.crop_left; + unsigned int cropV = params.crop_bottom + params.crop_top; + nsIntRect pictureRect(params.crop_left, + params.crop_top, + params.width - cropH, + params.height - cropV); + + // If the cropping data appears invalid then use the frame data + if (pictureRect.width <= 0 || + pictureRect.height <= 0 || + pictureRect.x < 0 || + pictureRect.y < 0) { + pictureRect.x = 0; + pictureRect.y = 0; + pictureRect.width = params.width; + pictureRect.height = params.height; + } + + // Validate the container-reported frame and pictureRect sizes. This + // ensures that our video frame creation code doesn't overflow. + nsIntSize displaySize(params.display_width, params.display_height); + nsIntSize frameSize(params.width, params.height); + if (!IsValidVideoRegion(frameSize, pictureRect, displaySize)) { + // Video track's frame sizes will overflow. Ignore the video track. + continue; + } + + mVideoTrack = track; + mHasVideo = true; + + mInfo.mVideo.mDisplay = displaySize; + mInfo.mVideo.mImage = frameSize; + mInfo.mVideo.SetImageRect(pictureRect); + + switch (params.stereo_mode) { + case NESTEGG_VIDEO_MONO: + mInfo.mVideo.mStereoMode = StereoMode::MONO; + break; + case NESTEGG_VIDEO_STEREO_LEFT_RIGHT: + mInfo.mVideo.mStereoMode = StereoMode::LEFT_RIGHT; + break; + case NESTEGG_VIDEO_STEREO_BOTTOM_TOP: + mInfo.mVideo.mStereoMode = StereoMode::BOTTOM_TOP; + break; + case NESTEGG_VIDEO_STEREO_TOP_BOTTOM: + mInfo.mVideo.mStereoMode = StereoMode::TOP_BOTTOM; + break; + case NESTEGG_VIDEO_STEREO_RIGHT_LEFT: + mInfo.mVideo.mStereoMode = StereoMode::RIGHT_LEFT; + break; + } + uint64_t duration = 0; + r = nestegg_duration(context, &duration); + if (!r) { + mInfo.mVideo.mDuration = media::TimeUnit::FromNanoseconds(duration).ToMicroseconds(); + } + mInfo.mVideo.mCrypto = GetTrackCrypto(TrackInfo::kVideoTrack, track); + if (mInfo.mVideo.mCrypto.mValid) { + mCrypto.AddInitData(NS_LITERAL_STRING("webm"), mInfo.mVideo.mCrypto.mKeyId); + } + } else if (type == NESTEGG_TRACK_AUDIO && !mHasAudio) { + nestegg_audio_params params; + r = nestegg_track_audio_params(context, track, ¶ms); + if (r == -1) { + return NS_ERROR_FAILURE; + } + + mAudioTrack = track; + mHasAudio = true; + mAudioCodec = nestegg_track_codec_id(context, track); + if (mAudioCodec == NESTEGG_CODEC_VORBIS) { + mInfo.mAudio.mMimeType = "audio/vorbis"; + } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { + mInfo.mAudio.mMimeType = "audio/opus"; + OpusDataDecoder::AppendCodecDelay(mInfo.mAudio.mCodecSpecificConfig, + media::TimeUnit::FromNanoseconds(params.codec_delay).ToMicroseconds()); + } + mSeekPreroll = params.seek_preroll; + mInfo.mAudio.mRate = params.rate; + mInfo.mAudio.mChannels = params.channels; + + unsigned int nheaders = 0; + r = nestegg_track_codec_data_count(context, track, &nheaders); + if (r == -1) { + return NS_ERROR_FAILURE; + } + + AutoTArray<const unsigned char*,4> headers; + AutoTArray<size_t,4> headerLens; + for (uint32_t header = 0; header < nheaders; ++header) { + unsigned char* data = 0; + size_t length = 0; + r = nestegg_track_codec_data(context, track, header, &data, &length); + if (r == -1) { + return NS_ERROR_FAILURE; + } + headers.AppendElement(data); + headerLens.AppendElement(length); + } + + // Vorbis has 3 headers, convert to Xiph extradata format to send them to + // the demuxer. + // TODO: This is already the format WebM stores them in. Would be nice + // to avoid having libnestegg split them only for us to pack them again, + // but libnestegg does not give us an API to access this data directly. + if (nheaders > 1) { + if (!XiphHeadersToExtradata(mInfo.mAudio.mCodecSpecificConfig, + headers, headerLens)) { + return NS_ERROR_FAILURE; + } + } + else { + mInfo.mAudio.mCodecSpecificConfig->AppendElements(headers[0], + headerLens[0]); + } + uint64_t duration = 0; + r = nestegg_duration(context, &duration); + if (!r) { + mInfo.mAudio.mDuration = media::TimeUnit::FromNanoseconds(duration).ToMicroseconds(); + } + mInfo.mAudio.mCrypto = GetTrackCrypto(TrackInfo::kAudioTrack, track); + if (mInfo.mAudio.mCrypto.mValid) { + mCrypto.AddInitData(NS_LITERAL_STRING("webm"), mInfo.mAudio.mCrypto.mKeyId); + } + } + } + return NS_OK; +} + +bool +WebMDemuxer::IsSeekable() const +{ + return Context(TrackInfo::kVideoTrack) && + nestegg_has_cues(Context(TrackInfo::kVideoTrack)); +} + +bool +WebMDemuxer::IsSeekableOnlyInBufferedRanges() const +{ + return Context(TrackInfo::kVideoTrack) && + !nestegg_has_cues(Context(TrackInfo::kVideoTrack)); +} + +void +WebMDemuxer::EnsureUpToDateIndex() +{ + if (!mNeedReIndex || !mInitData) { + return; + } + AutoPinned<MediaResource> resource( + Resource(TrackInfo::kVideoTrack).GetResource()); + MediaByteRangeSet byteRanges; + nsresult rv = resource->GetCachedRanges(byteRanges); + if (NS_FAILED(rv) || !byteRanges.Length()) { + return; + } + mBufferedState->UpdateIndex(byteRanges, resource); + + mNeedReIndex = false; + + if (!mIsMediaSource) { + return; + } + mLastWebMBlockOffset = mBufferedState->GetLastBlockOffset(); + MOZ_ASSERT(mLastWebMBlockOffset <= resource->GetLength()); +} + +void +WebMDemuxer::NotifyDataArrived() +{ + WEBM_DEBUG(""); + mNeedReIndex = true; +} + +void +WebMDemuxer::NotifyDataRemoved() +{ + mBufferedState->Reset(); + if (mInitData) { + mBufferedState->NotifyDataArrived(mInitData->Elements(), mInitData->Length(), 0); + } + mNeedReIndex = true; +} + +UniquePtr<EncryptionInfo> +WebMDemuxer::GetCrypto() +{ + return mCrypto.IsEncrypted() ? MakeUnique<EncryptionInfo>(mCrypto) : nullptr; +} + +CryptoTrack +WebMDemuxer::GetTrackCrypto(TrackInfo::TrackType aType, size_t aTrackNumber) { + const int WEBM_IV_SIZE = 16; + const unsigned char * contentEncKeyId; + size_t contentEncKeyIdLength; + CryptoTrack crypto; + nestegg* context = Context(aType); + + int r = nestegg_track_content_enc_key_id(context, aTrackNumber, &contentEncKeyId, &contentEncKeyIdLength); + + if (r == -1) { + WEBM_DEBUG("nestegg_track_content_enc_key_id failed r=%d", r); + return crypto; + } + + uint32_t i; + nsTArray<uint8_t> initData; + for (i = 0; i < contentEncKeyIdLength; i++) { + initData.AppendElement(contentEncKeyId[i]); + } + + if (!initData.IsEmpty()) { + crypto.mValid = true; + // crypto.mMode is not used for WebMs + crypto.mIVSize = WEBM_IV_SIZE; + crypto.mKeyId = Move(initData); + } + + return crypto; +} + +bool +WebMDemuxer::GetNextPacket(TrackInfo::TrackType aType, MediaRawDataQueue *aSamples) +{ + if (mIsMediaSource) { + // To ensure mLastWebMBlockOffset is properly up to date. + EnsureUpToDateIndex(); + } + + RefPtr<NesteggPacketHolder> holder(NextPacket(aType)); + + if (!holder) { + return false; + } + + int r = 0; + unsigned int count = 0; + r = nestegg_packet_count(holder->Packet(), &count); + if (r == -1) { + return false; + } + int64_t tstamp = holder->Timestamp(); + int64_t duration = holder->Duration(); + + // The end time of this frame is the start time of the next frame. Fetch + // the timestamp of the next packet for this track. If we've reached the + // end of the resource, use the file's duration as the end time of this + // video frame. + int64_t next_tstamp = INT64_MIN; + if (aType == TrackInfo::kAudioTrack) { + RefPtr<NesteggPacketHolder> next_holder(NextPacket(aType)); + if (next_holder) { + next_tstamp = next_holder->Timestamp(); + PushAudioPacket(next_holder); + } else if (duration >= 0) { + next_tstamp = tstamp + duration; + } else if (!mIsMediaSource || + (mIsMediaSource && mLastAudioFrameTime.isSome())) { + next_tstamp = tstamp; + next_tstamp += tstamp - mLastAudioFrameTime.refOr(0); + } else { + PushAudioPacket(holder); + } + mLastAudioFrameTime = Some(tstamp); + } else if (aType == TrackInfo::kVideoTrack) { + RefPtr<NesteggPacketHolder> next_holder(NextPacket(aType)); + if (next_holder) { + next_tstamp = next_holder->Timestamp(); + PushVideoPacket(next_holder); + } else if (duration >= 0) { + next_tstamp = tstamp + duration; + } else if (!mIsMediaSource || + (mIsMediaSource && mLastVideoFrameTime.isSome())) { + next_tstamp = tstamp; + next_tstamp += tstamp - mLastVideoFrameTime.refOr(0); + } else { + PushVideoPacket(holder); + } + mLastVideoFrameTime = Some(tstamp); + } + + if (mIsMediaSource && next_tstamp == INT64_MIN) { + return false; + } + + int64_t discardPadding = 0; + if (aType == TrackInfo::kAudioTrack) { + (void) nestegg_packet_discard_padding(holder->Packet(), &discardPadding); + } + + int packetEncryption = nestegg_packet_encryption(holder->Packet()); + + for (uint32_t i = 0; i < count; ++i) { + unsigned char* data; + size_t length; + r = nestegg_packet_data(holder->Packet(), i, &data, &length); + if (r == -1) { + WEBM_DEBUG("nestegg_packet_data failed r=%d", r); + return false; + } + bool isKeyframe = false; + if (aType == TrackInfo::kAudioTrack) { + isKeyframe = true; + } else if (aType == TrackInfo::kVideoTrack) { + if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED) { + // Packet is encrypted, can't peek, use packet info + isKeyframe = nestegg_packet_has_keyframe(holder->Packet()) == NESTEGG_PACKET_HAS_KEYFRAME_TRUE; + } else { + vpx_codec_stream_info_t si; + PodZero(&si); + si.sz = sizeof(si); + switch (mVideoCodec) { + case NESTEGG_CODEC_VP8: + vpx_codec_peek_stream_info(vpx_codec_vp8_dx(), data, length, &si); + break; + case NESTEGG_CODEC_VP9: + vpx_codec_peek_stream_info(vpx_codec_vp9_dx(), data, length, &si); + break; + } + isKeyframe = si.is_kf; + if (isKeyframe) { + // We only look for resolution changes on keyframes for both VP8 and + // VP9. Other resolution changes are invalid. + if (mLastSeenFrameWidth.isSome() && mLastSeenFrameHeight.isSome() && + (si.w != mLastSeenFrameWidth.value() || + si.h != mLastSeenFrameHeight.value())) { + mInfo.mVideo.mDisplay = nsIntSize(si.w, si.h); + mSharedVideoTrackInfo = new SharedTrackInfo(mInfo.mVideo, ++sStreamSourceID); + } + mLastSeenFrameWidth = Some(si.w); + mLastSeenFrameHeight = Some(si.h); + } + } + } + + WEBM_DEBUG("push sample tstamp: %ld next_tstamp: %ld length: %ld kf: %d", + tstamp, next_tstamp, length, isKeyframe); + RefPtr<MediaRawData> sample = new MediaRawData(data, length); + if (length && !sample->Data()) { + // OOM. + return false; + } + sample->mTimecode = tstamp; + sample->mTime = tstamp; + sample->mDuration = next_tstamp - tstamp; + sample->mOffset = holder->Offset(); + sample->mKeyframe = isKeyframe; + if (discardPadding && i == count - 1) { + CheckedInt64 discardFrames; + if (discardPadding < 0) { + // This is an invalid value as discard padding should never be negative. + // Set to maximum value so that the decoder will reject it as it's + // greater than the number of frames available. + discardFrames = INT32_MAX; + WEBM_DEBUG("Invalid negative discard padding"); + } else { + discardFrames = TimeUnitToFrames( + media::TimeUnit::FromNanoseconds(discardPadding), mInfo.mAudio.mRate); + } + if (discardFrames.isValid()) { + sample->mDiscardPadding = discardFrames.value(); + } + } + + if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_UNENCRYPTED || + packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED) { + nsAutoPtr<MediaRawDataWriter> writer(sample->CreateWriter()); + unsigned char const* iv; + size_t ivLength; + nestegg_packet_iv(holder->Packet(), &iv, &ivLength); + writer->mCrypto.mValid = true; + writer->mCrypto.mIVSize = ivLength; + if (ivLength == 0) { + // Frame is not encrypted + writer->mCrypto.mPlainSizes.AppendElement(length); + writer->mCrypto.mEncryptedSizes.AppendElement(0); + } else { + // Frame is encrypted + writer->mCrypto.mIV.AppendElements(iv, 8); + // Iv from a sample is 64 bits, must be padded with 64 bits more 0s + // in compliance with spec + for (uint32_t i = 0; i < 8; i++) { + writer->mCrypto.mIV.AppendElement(0); + } + writer->mCrypto.mPlainSizes.AppendElement(0); + writer->mCrypto.mEncryptedSizes.AppendElement(length); + } + } + if (aType == TrackInfo::kVideoTrack) { + sample->mTrackInfo = mSharedVideoTrackInfo; + } + aSamples->Push(sample); + } + return true; +} + +RefPtr<NesteggPacketHolder> +WebMDemuxer::NextPacket(TrackInfo::TrackType aType) +{ + bool isVideo = aType == TrackInfo::kVideoTrack; + + // Flag to indicate that we do need to playback these types of + // packets. + bool hasType = isVideo ? mHasVideo : mHasAudio; + + if (!hasType) { + return nullptr; + } + + // The packet queue for the type that we are interested in. + WebMPacketQueue &packets = isVideo ? mVideoPackets : mAudioPackets; + + if (packets.GetSize() > 0) { + return packets.PopFront(); + } + + // Track we are interested in + uint32_t ourTrack = isVideo ? mVideoTrack : mAudioTrack; + + do { + RefPtr<NesteggPacketHolder> holder = DemuxPacket(aType); + if (!holder) { + return nullptr; + } + + if (ourTrack == holder->Track()) { + return holder; + } + } while (true); +} + +RefPtr<NesteggPacketHolder> +WebMDemuxer::DemuxPacket(TrackInfo::TrackType aType) +{ + nestegg_packet* packet; + int r = nestegg_read_packet(Context(aType), &packet); + if (r == 0) { + nestegg_read_reset(Context(aType)); + return nullptr; + } else if (r < 0) { + return nullptr; + } + + unsigned int track = 0; + r = nestegg_packet_track(packet, &track); + if (r == -1) { + return nullptr; + } + + int64_t offset = Resource(aType).Tell(); + RefPtr<NesteggPacketHolder> holder = new NesteggPacketHolder(); + if (!holder->Init(packet, offset, track, false)) { + return nullptr; + } + + return holder; +} + +void +WebMDemuxer::PushAudioPacket(NesteggPacketHolder* aItem) +{ + mAudioPackets.PushFront(aItem); +} + +void +WebMDemuxer::PushVideoPacket(NesteggPacketHolder* aItem) +{ + mVideoPackets.PushFront(aItem); +} + +nsresult +WebMDemuxer::SeekInternal(TrackInfo::TrackType aType, + const media::TimeUnit& aTarget) +{ + EnsureUpToDateIndex(); + uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack; + uint64_t target = aTarget.ToNanoseconds(); + + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + + if (mSeekPreroll) { + uint64_t startTime = 0; + if (!mBufferedState->GetStartTime(&startTime)) { + startTime = 0; + } + WEBM_DEBUG("Seek Target: %f", + media::TimeUnit::FromNanoseconds(target).ToSeconds()); + if (target < mSeekPreroll || target - mSeekPreroll < startTime) { + target = startTime; + } else { + target -= mSeekPreroll; + } + WEBM_DEBUG("SeekPreroll: %f StartTime: %f Adjusted Target: %f", + media::TimeUnit::FromNanoseconds(mSeekPreroll).ToSeconds(), + media::TimeUnit::FromNanoseconds(startTime).ToSeconds(), + media::TimeUnit::FromNanoseconds(target).ToSeconds()); + } + int r = nestegg_track_seek(Context(aType), trackToSeek, target); + if (r == -1) { + WEBM_DEBUG("track_seek for track %u to %f failed, r=%d", trackToSeek, + media::TimeUnit::FromNanoseconds(target).ToSeconds(), r); + // Try seeking directly based on cluster information in memory. + int64_t offset = 0; + bool rv = mBufferedState->GetOffsetForTime(target, &offset); + if (!rv) { + WEBM_DEBUG("mBufferedState->GetOffsetForTime failed too"); + return NS_ERROR_FAILURE; + } + + r = nestegg_offset_seek(Context(aType), offset); + if (r == -1) { + WEBM_DEBUG("and nestegg_offset_seek to %" PRIu64 " failed", offset); + return NS_ERROR_FAILURE; + } + WEBM_DEBUG("got offset from buffered state: %" PRIu64 "", offset); + } + + if (aType == TrackInfo::kAudioTrack) { + mLastAudioFrameTime.reset(); + } else { + mLastVideoFrameTime.reset(); + } + + return NS_OK; +} + +media::TimeIntervals +WebMDemuxer::GetBuffered() +{ + EnsureUpToDateIndex(); + AutoPinned<MediaResource> resource( + Resource(TrackInfo::kVideoTrack).GetResource()); + + media::TimeIntervals buffered; + + MediaByteRangeSet ranges; + nsresult rv = resource->GetCachedRanges(ranges); + if (NS_FAILED(rv)) { + return media::TimeIntervals(); + } + uint64_t duration = 0; + uint64_t startOffset = 0; + if (!nestegg_duration(Context(TrackInfo::kVideoTrack), &duration)) { + if(mBufferedState->GetStartTime(&startOffset)) { + duration += startOffset; + } + WEBM_DEBUG("Duration: %f StartTime: %f", + media::TimeUnit::FromNanoseconds(duration).ToSeconds(), + media::TimeUnit::FromNanoseconds(startOffset).ToSeconds()); + } + for (uint32_t index = 0; index < ranges.Length(); index++) { + uint64_t start, end; + bool rv = mBufferedState->CalculateBufferedForRange(ranges[index].mStart, + ranges[index].mEnd, + &start, &end); + if (rv) { + NS_ASSERTION(startOffset <= start, + "startOffset negative or larger than start time"); + + if (duration && end > duration) { + WEBM_DEBUG("limit range to duration, end: %f duration: %f", + media::TimeUnit::FromNanoseconds(end).ToSeconds(), + media::TimeUnit::FromNanoseconds(duration).ToSeconds()); + end = duration; + } + media::TimeUnit startTime = media::TimeUnit::FromNanoseconds(start); + media::TimeUnit endTime = media::TimeUnit::FromNanoseconds(end); + WEBM_DEBUG("add range %f-%f", startTime.ToSeconds(), endTime.ToSeconds()); + buffered += media::TimeInterval(startTime, endTime); + } + } + return buffered; +} + +bool WebMDemuxer::GetOffsetForTime(uint64_t aTime, int64_t* aOffset) +{ + EnsureUpToDateIndex(); + return mBufferedState && mBufferedState->GetOffsetForTime(aTime, aOffset); +} + + +//WebMTrackDemuxer +WebMTrackDemuxer::WebMTrackDemuxer(WebMDemuxer* aParent, + TrackInfo::TrackType aType, + uint32_t aTrackNumber) + : mParent(aParent) + , mType(aType) + , mNeedKeyframe(true) +{ + mInfo = mParent->GetTrackInfo(aType, aTrackNumber); + MOZ_ASSERT(mInfo); +} + +WebMTrackDemuxer::~WebMTrackDemuxer() +{ + mSamples.Reset(); +} + +UniquePtr<TrackInfo> +WebMTrackDemuxer::GetInfo() const +{ + return mInfo->Clone(); +} + +RefPtr<WebMTrackDemuxer::SeekPromise> +WebMTrackDemuxer::Seek(media::TimeUnit aTime) +{ + // Seeks to aTime. Upon success, SeekPromise will be resolved with the + // actual time seeked to. Typically the random access point time + + media::TimeUnit seekTime = aTime; + mSamples.Reset(); + mParent->SeekInternal(mType, aTime); + mParent->GetNextPacket(mType, &mSamples); + mNeedKeyframe = true; + + // Check what time we actually seeked to. + if (mSamples.GetSize() > 0) { + const RefPtr<MediaRawData>& sample = mSamples.First(); + seekTime = media::TimeUnit::FromMicroseconds(sample->mTime); + } + SetNextKeyFrameTime(); + + return SeekPromise::CreateAndResolve(seekTime, __func__); +} + +RefPtr<MediaRawData> +WebMTrackDemuxer::NextSample() +{ + while (mSamples.GetSize() < 1 && mParent->GetNextPacket(mType, &mSamples)) { + } + if (mSamples.GetSize()) { + return mSamples.PopFront(); + } + return nullptr; +} + +RefPtr<WebMTrackDemuxer::SamplesPromise> +WebMTrackDemuxer::GetSamples(int32_t aNumSamples) +{ + RefPtr<SamplesHolder> samples = new SamplesHolder; + MOZ_ASSERT(aNumSamples); + + while (aNumSamples) { + RefPtr<MediaRawData> sample(NextSample()); + if (!sample) { + break; + } + if (mNeedKeyframe && !sample->mKeyframe) { + continue; + } + mNeedKeyframe = false; + samples->mSamples.AppendElement(sample); + aNumSamples--; + } + + if (samples->mSamples.IsEmpty()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, __func__); + } else { + UpdateSamples(samples->mSamples); + return SamplesPromise::CreateAndResolve(samples, __func__); + } +} + +void +WebMTrackDemuxer::SetNextKeyFrameTime() +{ + if (mType != TrackInfo::kVideoTrack || mParent->IsMediaSource()) { + return; + } + + int64_t frameTime = -1; + + mNextKeyframeTime.reset(); + + MediaRawDataQueue skipSamplesQueue; + bool foundKeyframe = false; + while (!foundKeyframe && mSamples.GetSize()) { + RefPtr<MediaRawData> sample = mSamples.PopFront(); + if (sample->mKeyframe) { + frameTime = sample->mTime; + foundKeyframe = true; + } + skipSamplesQueue.Push(sample.forget()); + } + Maybe<int64_t> startTime; + if (skipSamplesQueue.GetSize()) { + const RefPtr<MediaRawData>& sample = skipSamplesQueue.First(); + startTime.emplace(sample->mTimecode); + } + // Demux and buffer frames until we find a keyframe. + RefPtr<MediaRawData> sample; + while (!foundKeyframe && (sample = NextSample())) { + if (sample->mKeyframe) { + frameTime = sample->mTime; + foundKeyframe = true; + } + int64_t sampleTimecode = sample->mTimecode; + skipSamplesQueue.Push(sample.forget()); + if (!startTime) { + startTime.emplace(sampleTimecode); + } else if (!foundKeyframe && + sampleTimecode > startTime.ref() + MAX_LOOK_AHEAD) { + WEBM_DEBUG("Couldn't find keyframe in a reasonable time, aborting"); + break; + } + } + // We may have demuxed more than intended, so ensure that all frames are kept + // in the right order. + mSamples.PushFront(Move(skipSamplesQueue)); + + if (frameTime != -1) { + mNextKeyframeTime.emplace(media::TimeUnit::FromMicroseconds(frameTime)); + WEBM_DEBUG("Next Keyframe %f (%u queued %.02fs)", + mNextKeyframeTime.value().ToSeconds(), + uint32_t(mSamples.GetSize()), + media::TimeUnit::FromMicroseconds(mSamples.Last()->mTimecode - mSamples.First()->mTimecode).ToSeconds()); + } else { + WEBM_DEBUG("Couldn't determine next keyframe time (%u queued)", + uint32_t(mSamples.GetSize())); + } +} + +void +WebMTrackDemuxer::Reset() +{ + mSamples.Reset(); + media::TimeIntervals buffered = GetBuffered(); + mNeedKeyframe = true; + if (buffered.Length()) { + WEBM_DEBUG("Seek to start point: %f", buffered.Start(0).ToSeconds()); + mParent->SeekInternal(mType, buffered.Start(0)); + SetNextKeyFrameTime(); + } else { + mNextKeyframeTime.reset(); + } +} + +void +WebMTrackDemuxer::UpdateSamples(nsTArray<RefPtr<MediaRawData>>& aSamples) +{ + for (const auto& sample : aSamples) { + if (sample->mCrypto.mValid) { + nsAutoPtr<MediaRawDataWriter> writer(sample->CreateWriter()); + writer->mCrypto.mMode = mInfo->mCrypto.mMode; + writer->mCrypto.mIVSize = mInfo->mCrypto.mIVSize; + writer->mCrypto.mKeyId.AppendElements(mInfo->mCrypto.mKeyId); + } + } + if (mNextKeyframeTime.isNothing() || + aSamples.LastElement()->mTime >= mNextKeyframeTime.value().ToMicroseconds()) { + SetNextKeyFrameTime(); + } +} + +nsresult +WebMTrackDemuxer::GetNextRandomAccessPoint(media::TimeUnit* aTime) +{ + if (mNextKeyframeTime.isNothing()) { + // There's no next key frame. + *aTime = + media::TimeUnit::FromMicroseconds(std::numeric_limits<int64_t>::max()); + } else { + *aTime = mNextKeyframeTime.ref(); + } + return NS_OK; +} + +RefPtr<WebMTrackDemuxer::SkipAccessPointPromise> +WebMTrackDemuxer::SkipToNextRandomAccessPoint(media::TimeUnit aTimeThreshold) +{ + uint32_t parsed = 0; + bool found = false; + RefPtr<MediaRawData> sample; + int64_t sampleTime; + + WEBM_DEBUG("TimeThreshold: %f", aTimeThreshold.ToSeconds()); + while (!found && (sample = NextSample())) { + parsed++; + sampleTime = sample->mTime; + if (sample->mKeyframe && sampleTime >= aTimeThreshold.ToMicroseconds()) { + found = true; + mSamples.Reset(); + mSamples.PushFront(sample.forget()); + } + } + SetNextKeyFrameTime(); + if (found) { + WEBM_DEBUG("next sample: %f (parsed: %d)", + media::TimeUnit::FromMicroseconds(sampleTime).ToSeconds(), + parsed); + return SkipAccessPointPromise::CreateAndResolve(parsed, __func__); + } else { + SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed); + return SkipAccessPointPromise::CreateAndReject(Move(failure), __func__); + } +} + +media::TimeIntervals +WebMTrackDemuxer::GetBuffered() +{ + return mParent->GetBuffered(); +} + +void +WebMTrackDemuxer::BreakCycles() +{ + mParent = nullptr; +} + +int64_t +WebMTrackDemuxer::GetEvictionOffset(const media::TimeUnit& aTime) +{ + int64_t offset; + if (!mParent->GetOffsetForTime(aTime.ToNanoseconds(), &offset)) { + return 0; + } + + return offset; +} + +#undef WEBM_DEBUG +} // namespace mozilla diff --git a/dom/media/webm/WebMDemuxer.h b/dom/media/webm/WebMDemuxer.h new file mode 100644 index 000000000..6fff38e7d --- /dev/null +++ b/dom/media/webm/WebMDemuxer.h @@ -0,0 +1,292 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(WebMDemuxer_h_) +#define WebMDemuxer_h_ + +#include "nsTArray.h" +#include "MediaDataDemuxer.h" +#include "NesteggPacketHolder.h" +#include "mozilla/Move.h" + +typedef struct nestegg nestegg; + +namespace mozilla { + +class WebMBufferedState; + +// Queue for holding MediaRawData samples +class MediaRawDataQueue { + public: + uint32_t GetSize() { + return mQueue.size(); + } + + void Push(MediaRawData* aItem) { + mQueue.push_back(aItem); + } + + void Push(already_AddRefed<MediaRawData>&& aItem) { + mQueue.push_back(Move(aItem)); + } + + void PushFront(MediaRawData* aItem) { + mQueue.push_front(aItem); + } + + void PushFront(already_AddRefed<MediaRawData>&& aItem) { + mQueue.push_front(Move(aItem)); + } + + void PushFront(MediaRawDataQueue&& aOther) { + while (!aOther.mQueue.empty()) { + PushFront(aOther.Pop()); + } + } + + already_AddRefed<MediaRawData> PopFront() { + RefPtr<MediaRawData> result = mQueue.front().forget(); + mQueue.pop_front(); + return result.forget(); + } + + already_AddRefed<MediaRawData> Pop() { + RefPtr<MediaRawData> result = mQueue.back().forget(); + mQueue.pop_back(); + return result.forget(); + } + + void Reset() { + while (!mQueue.empty()) { + mQueue.pop_front(); + } + } + + MediaRawDataQueue& operator=(const MediaRawDataQueue& aOther) { + mQueue = aOther.mQueue; + return *this; + } + + const RefPtr<MediaRawData>& First() const { + return mQueue.front(); + } + + const RefPtr<MediaRawData>& Last() const { + return mQueue.back(); + } + +private: + std::deque<RefPtr<MediaRawData>> mQueue; +}; + +class WebMTrackDemuxer; + +class WebMDemuxer : public MediaDataDemuxer +{ +public: + explicit WebMDemuxer(MediaResource* aResource); + // Indicate if the WebMDemuxer is to be used with MediaSource. In which + // case the demuxer will stop reads to the last known complete block. + WebMDemuxer(MediaResource* aResource, bool aIsMediaSource); + + RefPtr<InitPromise> Init() override; + + bool HasTrackType(TrackInfo::TrackType aType) const override; + + uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override; + + UniquePtr<TrackInfo> GetTrackInfo(TrackInfo::TrackType aType, size_t aTrackNumber) const; + + already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer(TrackInfo::TrackType aType, + uint32_t aTrackNumber) override; + + bool IsSeekable() const override; + + bool IsSeekableOnlyInBufferedRanges() const override; + + UniquePtr<EncryptionInfo> GetCrypto() override; + + bool GetOffsetForTime(uint64_t aTime, int64_t* aOffset); + + // Demux next WebM packet and append samples to MediaRawDataQueue + bool GetNextPacket(TrackInfo::TrackType aType, MediaRawDataQueue *aSamples); + + nsresult Reset(TrackInfo::TrackType aType); + + // Pushes a packet to the front of the audio packet queue. + void PushAudioPacket(NesteggPacketHolder* aItem); + + // Pushes a packet to the front of the video packet queue. + void PushVideoPacket(NesteggPacketHolder* aItem); + + // Public accessor for nestegg callbacks + bool IsMediaSource() const + { + return mIsMediaSource; + } + + int64_t LastWebMBlockOffset() const + { + return mLastWebMBlockOffset; + } + + struct NestEggContext { + NestEggContext(WebMDemuxer* aParent, MediaResource* aResource) + : mParent(aParent) + , mResource(aResource) + , mContext(nullptr) {} + + ~NestEggContext(); + + int Init(); + + // Public accessor for nestegg callbacks + + bool IsMediaSource() const { return mParent->IsMediaSource(); } + MediaResourceIndex* GetResource() { return &mResource; } + + int64_t GetEndDataOffset() const + { + return (!mParent->IsMediaSource() || mParent->LastWebMBlockOffset() < 0) + ? mResource.GetLength() : mParent->LastWebMBlockOffset(); + } + + WebMDemuxer* mParent; + MediaResourceIndex mResource; + nestegg* mContext; + }; + +private: + friend class WebMTrackDemuxer; + + ~WebMDemuxer(); + void InitBufferedState(); + nsresult ReadMetadata(); + void NotifyDataArrived() override; + void NotifyDataRemoved() override; + void EnsureUpToDateIndex(); + media::TimeIntervals GetBuffered(); + nsresult SeekInternal(TrackInfo::TrackType aType, + const media::TimeUnit& aTarget); + CryptoTrack GetTrackCrypto(TrackInfo::TrackType aType, size_t aTrackNumber); + + // Read a packet from the nestegg file. Returns nullptr if all packets for + // the particular track have been read. Pass TrackInfo::kVideoTrack or + // TrackInfo::kVideoTrack to indicate the type of the packet we want to read. + RefPtr<NesteggPacketHolder> NextPacket(TrackInfo::TrackType aType); + + // Internal method that demuxes the next packet from the stream. The caller + // is responsible for making sure it doesn't get lost. + RefPtr<NesteggPacketHolder> DemuxPacket(TrackInfo::TrackType aType); + + // libnestegg audio and video context for webm container. + // Access on reader's thread only. + NestEggContext mVideoContext; + NestEggContext mAudioContext; + MediaResourceIndex& Resource(TrackInfo::TrackType aType) + { + return aType == TrackInfo::kVideoTrack + ? mVideoContext.mResource : mAudioContext.mResource; + } + nestegg* Context(TrackInfo::TrackType aType) const + { + return aType == TrackInfo::kVideoTrack + ? mVideoContext.mContext : mAudioContext.mContext; + } + + MediaInfo mInfo; + nsTArray<RefPtr<WebMTrackDemuxer>> mDemuxers; + + // Parser state and computed offset-time mappings. Shared by multiple + // readers when decoder has been cloned. Main thread only. + RefPtr<WebMBufferedState> mBufferedState; + RefPtr<MediaByteBuffer> mInitData; + + + // Queue of video and audio packets that have been read but not decoded. + WebMPacketQueue mVideoPackets; + WebMPacketQueue mAudioPackets; + + // Index of video and audio track to play + uint32_t mVideoTrack; + uint32_t mAudioTrack; + + // Nanoseconds to discard after seeking. + uint64_t mSeekPreroll; + + // Calculate the frame duration from the last decodeable frame using the + // previous frame's timestamp. In NS. + Maybe<int64_t> mLastAudioFrameTime; + Maybe<int64_t> mLastVideoFrameTime; + + // Codec ID of audio track + int mAudioCodec; + // Codec ID of video track + int mVideoCodec; + + // Booleans to indicate if we have audio and/or video data + bool mHasVideo; + bool mHasAudio; + bool mNeedReIndex; + + // The last complete block parsed by the WebMBufferedState. -1 if not set. + // We cache those values rather than retrieving them for performance reasons + // as nestegg only performs 1-byte read at a time. + int64_t mLastWebMBlockOffset; + const bool mIsMediaSource; + + Maybe<uint32_t> mLastSeenFrameWidth; + Maybe<uint32_t> mLastSeenFrameHeight; + // This will be populated only if a resolution change occurs, otherwise it + // will be left as null so the original metadata is used + RefPtr<SharedTrackInfo> mSharedVideoTrackInfo; + + EncryptionInfo mCrypto; +}; + +class WebMTrackDemuxer : public MediaTrackDemuxer +{ +public: + WebMTrackDemuxer(WebMDemuxer* aParent, + TrackInfo::TrackType aType, + uint32_t aTrackNumber); + + UniquePtr<TrackInfo> GetInfo() const override; + + RefPtr<SeekPromise> Seek(media::TimeUnit aTime) override; + + RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override; + + void Reset() override; + + nsresult GetNextRandomAccessPoint(media::TimeUnit* aTime) override; + + RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint(media::TimeUnit aTimeThreshold) override; + + media::TimeIntervals GetBuffered() override; + + int64_t GetEvictionOffset(const media::TimeUnit& aTime) override; + + void BreakCycles() override; + +private: + friend class WebMDemuxer; + ~WebMTrackDemuxer(); + void UpdateSamples(nsTArray<RefPtr<MediaRawData>>& aSamples); + void SetNextKeyFrameTime(); + RefPtr<MediaRawData> NextSample (); + RefPtr<WebMDemuxer> mParent; + TrackInfo::TrackType mType; + UniquePtr<TrackInfo> mInfo; + Maybe<media::TimeUnit> mNextKeyframeTime; + bool mNeedKeyframe; + + // Queued samples extracted by the demuxer, but not yet returned. + MediaRawDataQueue mSamples; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMWriter.cpp b/dom/media/webm/WebMWriter.cpp new file mode 100644 index 000000000..a98a14f4e --- /dev/null +++ b/dom/media/webm/WebMWriter.cpp @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebMWriter.h" +#include "EbmlComposer.h" +#include "GeckoProfiler.h" +#include "OpusTrackEncoder.h" + +namespace mozilla { + +WebMWriter::WebMWriter(uint32_t aTrackTypes) : ContainerWriter() +{ + mMetadataRequiredFlag = aTrackTypes; + mEbmlComposer = new EbmlComposer(); +} + +WebMWriter::~WebMWriter() +{ + // Out-of-line dtor so mEbmlComposer nsAutoPtr can delete a complete type. +} + +nsresult +WebMWriter::WriteEncodedTrack(const EncodedFrameContainer& aData, + uint32_t aFlags) +{ + PROFILER_LABEL("WebMWriter", "SetMetadata", + js::ProfileEntry::Category::OTHER); + for (uint32_t i = 0 ; i < aData.GetEncodedFrames().Length(); i++) { + mEbmlComposer->WriteSimpleBlock(aData.GetEncodedFrames().ElementAt(i).get()); + } + return NS_OK; +} + +nsresult +WebMWriter::GetContainerData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + uint32_t aFlags) +{ + PROFILER_LABEL("WebMWriter", "GetContainerData", + js::ProfileEntry::Category::OTHER); + mEbmlComposer->ExtractBuffer(aOutputBufs, aFlags); + if (aFlags & ContainerWriter::FLUSH_NEEDED) { + mIsWritingComplete = true; + } + return NS_OK; +} + +nsresult +WebMWriter::SetMetadata(TrackMetadataBase* aMetadata) +{ + MOZ_ASSERT(aMetadata); + PROFILER_LABEL("WebMWriter", "SetMetadata", + js::ProfileEntry::Category::OTHER); + + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_VP8) { + VP8Metadata* meta = static_cast<VP8Metadata*>(aMetadata); + MOZ_ASSERT(meta, "Cannot find vp8 encoder metadata"); + mEbmlComposer->SetVideoConfig(meta->mWidth, meta->mHeight, + meta->mDisplayWidth, meta->mDisplayHeight, + meta->mEncodedFrameRate); + mMetadataRequiredFlag = mMetadataRequiredFlag & ~ContainerWriter::CREATE_VIDEO_TRACK; + } + + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_VORBIS) { + VorbisMetadata* meta = static_cast<VorbisMetadata*>(aMetadata); + MOZ_ASSERT(meta, "Cannot find vorbis encoder metadata"); + mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels); + mEbmlComposer->SetAudioCodecPrivateData(meta->mData); + mMetadataRequiredFlag = mMetadataRequiredFlag & ~ContainerWriter::CREATE_AUDIO_TRACK; + } + + if (aMetadata->GetKind() == TrackMetadataBase::METADATA_OPUS) { + OpusMetadata* meta = static_cast<OpusMetadata*>(aMetadata); + MOZ_ASSERT(meta, "Cannot find Opus encoder metadata"); + mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels); + mEbmlComposer->SetAudioCodecPrivateData(meta->mIdHeader); + mMetadataRequiredFlag = mMetadataRequiredFlag & ~ContainerWriter::CREATE_AUDIO_TRACK; + } + + if (!mMetadataRequiredFlag) { + mEbmlComposer->GenerateHeader(); + } + return NS_OK; +} + +} // namespace mozilla diff --git a/dom/media/webm/WebMWriter.h b/dom/media/webm/WebMWriter.h new file mode 100644 index 000000000..c4dfec8e5 --- /dev/null +++ b/dom/media/webm/WebMWriter.h @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef WebMWriter_h_ +#define WebMWriter_h_ + +#include "ContainerWriter.h" +#include "nsAutoPtr.h" + +namespace mozilla { + +class EbmlComposer; + +// Vorbis meta data structure +class VorbisMetadata : public TrackMetadataBase +{ +public: + nsTArray<uint8_t> mData; + int32_t mChannels; + float mSamplingFrequency; + MetadataKind GetKind() const override { return METADATA_VORBIS; } +}; + +// VP8 meta data structure +class VP8Metadata : public TrackMetadataBase +{ +public: + int32_t mWidth; + int32_t mHeight; + int32_t mDisplayWidth; + int32_t mDisplayHeight; + int32_t mEncodedFrameRate; + MetadataKind GetKind() const override { return METADATA_VP8; } +}; + +/** + * WebM writer helper + * This class accepts encoder to set audio or video meta data or + * encoded data to ebml Composer, and get muxing data through GetContainerData. + * The ctor/dtor run in the MediaRecorder thread, others run in MediaEncoder thread. + */ +class WebMWriter : public ContainerWriter +{ +public: + // aTrackTypes indicate this muxer should multiplex into Video only or A/V foramt. + // Run in MediaRecorder thread + explicit WebMWriter(uint32_t aTrackTypes); + virtual ~WebMWriter(); + + // WriteEncodedTrack inserts raw packets into WebM stream. + nsresult WriteEncodedTrack(const EncodedFrameContainer &aData, + uint32_t aFlags = 0) override; + + // GetContainerData outputs multiplexing data. + // aFlags indicates the muxer should enter into finished stage and flush out + // queue data. + nsresult GetContainerData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + uint32_t aFlags = 0) override; + + // Assign metadata into muxer + nsresult SetMetadata(TrackMetadataBase* aMetadata) override; + +private: + nsAutoPtr<EbmlComposer> mEbmlComposer; + + // Indicate what kind of meta data needed in the writer. + // If this value become 0, it means writer can start to generate header. + uint8_t mMetadataRequiredFlag; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/moz.build b/dom/media/webm/moz.build new file mode 100644 index 000000000..f2898fafd --- /dev/null +++ b/dom/media/webm/moz.build @@ -0,0 +1,31 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + 'NesteggPacketHolder.h', + 'WebMBufferedParser.h', + 'WebMDecoder.h', + 'WebMDemuxer.h', +] + +UNIFIED_SOURCES += [ + 'WebMBufferedParser.cpp', + 'WebMDecoder.cpp', + 'WebMDemuxer.cpp', +] + +if CONFIG['MOZ_WEBM_ENCODER']: + EXPORTS += ['WebMWriter.h'] + UNIFIED_SOURCES += ['EbmlComposer.cpp', + 'WebMWriter.cpp', + ] + +CXXFLAGS += CONFIG['MOZ_LIBVPX_CFLAGS'] + +FINAL_LIBRARY = 'xul' + +if CONFIG['GNU_CXX']: + CXXFLAGS += ['-Wno-error=shadow'] |