summaryrefslogtreecommitdiffstats
path: root/dom/media/MP3FrameParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/MP3FrameParser.cpp')
-rw-r--r--dom/media/MP3FrameParser.cpp591
1 files changed, 591 insertions, 0 deletions
diff --git a/dom/media/MP3FrameParser.cpp b/dom/media/MP3FrameParser.cpp
new file mode 100644
index 000000000..242e3df00
--- /dev/null
+++ b/dom/media/MP3FrameParser.cpp
@@ -0,0 +1,591 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+
+#include "nsMemory.h"
+#include "MP3FrameParser.h"
+#include "VideoUtils.h"
+
+
+#define FROM_BIG_ENDIAN(X) ((uint32_t)((uint8_t)(X)[0] << 24 | (uint8_t)(X)[1] << 16 | \
+ (uint8_t)(X)[2] << 8 | (uint8_t)(X)[3]))
+
+
+namespace mozilla {
+
+/*
+ * Following code taken from http://www.hydrogenaudio.org/forums/index.php?showtopic=85125
+ * with permission from the author, Nick Wallette <sirnickity@gmail.com>.
+ */
+
+/* BEGIN shameless copy and paste */
+
+// Bitrates - use [version][layer][bitrate]
+const uint16_t mpeg_bitrates[4][4][16] = {
+ { // Version 2.5
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved
+ { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3
+ { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2
+ { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1
+ },
+ { // Reserved
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // Invalid
+ },
+ { // Version 2
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved
+ { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3
+ { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2
+ { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1
+ },
+ { // Version 1
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved
+ { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0 }, // Layer 3
+ { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, // Layer 2
+ { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 }, // Layer 1
+ }
+};
+
+// Sample rates - use [version][srate]
+const uint16_t mpeg_srates[4][4] = {
+ { 11025, 12000, 8000, 0 }, // MPEG 2.5
+ { 0, 0, 0, 0 }, // Reserved
+ { 22050, 24000, 16000, 0 }, // MPEG 2
+ { 44100, 48000, 32000, 0 } // MPEG 1
+};
+
+// Samples per frame - use [version][layer]
+const uint16_t mpeg_frame_samples[4][4] = {
+// Rsvd 3 2 1 < Layer v Version
+ { 0, 576, 1152, 384 }, // 2.5
+ { 0, 0, 0, 0 }, // Reserved
+ { 0, 576, 1152, 384 }, // 2
+ { 0, 1152, 1152, 384 } // 1
+};
+
+// Slot size (MPEG unit of measurement) - use [layer]
+const uint8_t mpeg_slot_size[4] = { 0, 1, 1, 4 }; // Rsvd, 3, 2, 1
+
+uint16_t
+MP3Frame::CalculateLength()
+{
+ // Lookup real values of these fields
+ uint32_t bitrate = mpeg_bitrates[mVersion][mLayer][mBitrate] * 1000;
+ uint32_t samprate = mpeg_srates[mVersion][mSampleRate];
+ uint16_t samples = mpeg_frame_samples[mVersion][mLayer];
+ uint8_t slot_size = mpeg_slot_size[mLayer];
+
+ // In-between calculations
+ float bps = (float)samples / 8.0;
+ float fsize = ( (bps * (float)bitrate) / (float)samprate )
+ + ( (mPad) ? slot_size : 0 );
+
+ // Frame sizes are truncated integers
+ return (uint16_t)fsize;
+}
+
+/* END shameless copy and paste */
+
+
+/** MP3Parser methods **/
+
+MP3Parser::MP3Parser()
+ : mCurrentChar(0)
+{ }
+
+void
+MP3Parser::Reset()
+{
+ mCurrentChar = 0;
+}
+
+uint16_t
+MP3Parser::ParseFrameLength(uint8_t ch)
+{
+ mData.mRaw[mCurrentChar] = ch;
+
+ MP3Frame &frame = mData.mFrame;
+
+ // Validate MP3 header as we read. We can't mistake the start of an MP3 frame
+ // for the middle of another frame due to the sync byte at the beginning
+ // of the frame.
+
+ // The only valid position for an all-high byte is the sync byte at the
+ // beginning of the frame.
+ if (ch == 0xff) {
+ mCurrentChar = 0;
+ }
+
+ // Make sure the current byte is valid in context. If not, reset the parser.
+ if (mCurrentChar == 2) {
+ if (frame.mBitrate == 0x0f) {
+ goto fail;
+ }
+ } else if (mCurrentChar == 1) {
+ if (frame.mSync2 != 0x07
+ || frame.mVersion == 0x01
+ || frame.mLayer == 0x00) {
+ goto fail;
+ }
+ }
+
+ // The only valid character at the beginning of the header is 0xff. Fail if
+ // it's different.
+ if (mCurrentChar == 0 && frame.mSync1 != 0xff) {
+ // Couldn't find the sync byte. Fail.
+ return 0;
+ }
+
+ mCurrentChar++;
+ MOZ_ASSERT(mCurrentChar <= sizeof(MP3Frame));
+
+ // Don't have a full header yet.
+ if (mCurrentChar < sizeof(MP3Frame)) {
+ return 0;
+ }
+
+ // Woo, valid header. Return the length.
+ mCurrentChar = 0;
+ return frame.CalculateLength();
+
+fail:
+ Reset();
+ return 0;
+}
+
+uint32_t
+MP3Parser::GetSampleRate()
+{
+ MP3Frame &frame = mData.mFrame;
+ return mpeg_srates[frame.mVersion][frame.mSampleRate];
+}
+
+uint32_t
+MP3Parser::GetSamplesPerFrame()
+{
+ MP3Frame &frame = mData.mFrame;
+ return mpeg_frame_samples[frame.mVersion][frame.mLayer];
+}
+
+
+/** ID3Parser methods **/
+
+const char sID3Head[3] = { 'I', 'D', '3' };
+const uint32_t ID3_HEADER_LENGTH = 10;
+const uint32_t ID3_FOOTER_LENGTH = 10;
+const uint8_t ID3_FOOTER_PRESENT = 0x10;
+
+ID3Parser::ID3Parser()
+ : mCurrentChar(0)
+ , mVersion(0)
+ , mFlags(0)
+ , mHeaderLength(0)
+{ }
+
+void
+ID3Parser::Reset()
+{
+ mCurrentChar = mVersion = mFlags = mHeaderLength = 0;
+}
+
+bool
+ID3Parser::ParseChar(char ch)
+{
+ switch (mCurrentChar) {
+ // The first three bytes of an ID3v2 header must match the string "ID3".
+ case 0: case 1: case 2:
+ if (ch != sID3Head[mCurrentChar]) {
+ goto fail;
+ }
+ break;
+ // The fourth and fifth bytes give the version, between 2 and 4.
+ case 3:
+ if (ch < '\2' || ch > '\4') {
+ goto fail;
+ }
+ mVersion = uint8_t(ch);
+ break;
+ case 4:
+ if (ch != '\0') {
+ goto fail;
+ }
+ break;
+ // The sixth byte gives the flags; valid flags depend on the version.
+ case 5:
+ if ((ch & (0xff >> mVersion)) != '\0') {
+ goto fail;
+ }
+ mFlags = uint8_t(ch);
+ break;
+ // Bytes seven through ten give the sum of the byte length of the extended
+ // header, the padding and the frames after unsynchronisation.
+ // These bytes form a 28-bit integer, with the high bit of each byte unset.
+ case 6: case 7: case 8: case 9:
+ if (ch & 0x80) {
+ goto fail;
+ }
+ mHeaderLength <<= 7;
+ mHeaderLength |= ch;
+ if (mCurrentChar == 9) {
+ mHeaderLength += ID3_HEADER_LENGTH;
+ mHeaderLength += (mFlags & ID3_FOOTER_PRESENT) ? ID3_FOOTER_LENGTH : 0;
+ }
+ break;
+ default:
+ MOZ_CRASH("Header already fully parsed!");
+ }
+
+ mCurrentChar++;
+
+ return IsParsed();
+
+fail:
+ if (mCurrentChar) {
+ Reset();
+ return ParseChar(ch);
+ }
+ Reset();
+ return false;
+}
+
+bool
+ID3Parser::IsParsed() const
+{
+ return mCurrentChar >= ID3_HEADER_LENGTH;
+}
+
+uint32_t
+ID3Parser::GetHeaderLength() const
+{
+ MOZ_ASSERT(IsParsed(),
+ "Queried length of ID3 header before parsing finished.");
+ return mHeaderLength;
+}
+
+
+/** VBR header helper stuff **/
+
+// Helper function to find a VBR header in an MP3 frame.
+// Based on information from
+// http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header
+
+const uint32_t VBRI_TAG = FROM_BIG_ENDIAN("VBRI");
+const uint32_t VBRI_OFFSET = 32 - sizeof(MP3Frame);
+const uint32_t VBRI_FRAME_COUNT_OFFSET = VBRI_OFFSET + 14;
+const uint32_t VBRI_MIN_FRAME_SIZE = VBRI_OFFSET + 26;
+
+const uint32_t XING_TAG = FROM_BIG_ENDIAN("Xing");
+enum XingFlags {
+ XING_HAS_NUM_FRAMES = 0x01,
+ XING_HAS_NUM_BYTES = 0x02,
+ XING_HAS_TOC = 0x04,
+ XING_HAS_VBR_SCALE = 0x08
+};
+
+static int64_t
+ParseXing(const char *aBuffer)
+{
+ uint32_t flags = FROM_BIG_ENDIAN(aBuffer + 4);
+
+ if (!(flags & XING_HAS_NUM_FRAMES)) {
+ NS_WARNING("VBR file without frame count. Duration estimation likely to "
+ "be totally wrong.");
+ return -1;
+ }
+
+ int64_t numFrames = -1;
+ if (flags & XING_HAS_NUM_FRAMES) {
+ numFrames = FROM_BIG_ENDIAN(aBuffer + 8);
+ }
+
+ return numFrames;
+}
+
+static int64_t
+FindNumVBRFrames(const nsCString& aFrame)
+{
+ const char *buffer = aFrame.get();
+ const char *bufferEnd = aFrame.get() + aFrame.Length();
+
+ // VBRI header is nice and well-defined; let's try to find that first.
+ if (aFrame.Length() > VBRI_MIN_FRAME_SIZE &&
+ FROM_BIG_ENDIAN(buffer + VBRI_OFFSET) == VBRI_TAG) {
+ return FROM_BIG_ENDIAN(buffer + VBRI_FRAME_COUNT_OFFSET);
+ }
+
+ // We have to search for the Xing header as its position can change.
+ for (; buffer + sizeof(XING_TAG) < bufferEnd; buffer++) {
+ if (FROM_BIG_ENDIAN(buffer) == XING_TAG) {
+ return ParseXing(buffer);
+ }
+ }
+
+ return -1;
+}
+
+
+/** MP3FrameParser methods **/
+
+// Some MP3's have large ID3v2 tags, up to 150KB, so we allow lots of
+// skipped bytes to be read, just in case, before we give up and assume
+// we're not parsing an MP3 stream.
+static const uint32_t MAX_SKIPPED_BYTES = 4096;
+
+enum {
+ MP3_HEADER_LENGTH = 4,
+};
+
+MP3FrameParser::MP3FrameParser(int64_t aLength)
+: mLock("MP3FrameParser.mLock"),
+ mTotalID3Size(0),
+ mTotalFrameSize(0),
+ mFrameCount(0),
+ mOffset(0),
+ mLength(aLength),
+ mMP3Offset(-1),
+ mSamplesPerSecond(0),
+ mFirstFrameEnd(-1),
+ mIsMP3(MAYBE_MP3)
+{ }
+
+nsresult MP3FrameParser::ParseBuffer(const uint8_t* aBuffer,
+ uint32_t aLength,
+ int64_t aStreamOffset,
+ uint32_t* aOutBytesRead)
+{
+ // Iterate forwards over the buffer, looking for ID3 tag, or MP3
+ // Frame headers.
+ const uint8_t *buffer = aBuffer;
+ const uint8_t *bufferEnd = aBuffer + aLength;
+
+ // If we haven't found any MP3 frame data yet, there might be ID3 headers
+ // we can skip over.
+ if (mMP3Offset < 0) {
+ for (const uint8_t *ch = buffer; ch < bufferEnd; ch++) {
+ if (mID3Parser.ParseChar(*ch)) {
+ // Found an ID3 header. We don't care about the body of the header, so
+ // just skip past.
+ buffer = ch + mID3Parser.GetHeaderLength() - (ID3_HEADER_LENGTH - 1);
+
+ if (buffer <= ch) {
+ return NS_ERROR_FAILURE;
+ }
+
+ ch = buffer;
+
+ mTotalID3Size += mID3Parser.GetHeaderLength();
+
+ // Yes, this is an MP3!
+ mIsMP3 = DEFINITELY_MP3;
+
+ mID3Parser.Reset();
+ }
+ }
+ }
+
+ // The first MP3 frame in a variable bitrate stream can contain metadata
+ // for duration estimation and seeking, so we buffer that first frame here.
+ if (aStreamOffset < mFirstFrameEnd) {
+ uint64_t copyLen = std::min((int64_t)aLength, mFirstFrameEnd - aStreamOffset);
+ mFirstFrame.Append((const char *)buffer, copyLen);
+ buffer += copyLen;
+ }
+
+ while (buffer < bufferEnd) {
+ uint16_t frameLen = mMP3Parser.ParseFrameLength(*buffer);
+
+ if (frameLen) {
+ // We've found an MP3 frame!
+ // This is the first frame (and the only one we'll bother parsing), so:
+ // * Mark this stream as MP3;
+ // * Store the offset at which the MP3 data started; and
+ // * Start buffering the frame, as it might contain handy metadata.
+
+ // We're now sure this is an MP3 stream.
+ mIsMP3 = DEFINITELY_MP3;
+
+ // We need to know these to convert the number of frames in the stream
+ // to the length of the stream in seconds.
+ mSamplesPerSecond = mMP3Parser.GetSampleRate();
+ mSamplesPerFrame = mMP3Parser.GetSamplesPerFrame();
+
+ // If the stream has a constant bitrate, we should only need the length
+ // of the first frame and the length (in bytes) of the stream to
+ // estimate the length (in seconds).
+ mTotalFrameSize += frameLen;
+ mFrameCount++;
+
+ // If |mMP3Offset| isn't set then this is the first MP3 frame we have
+ // seen in the stream, which is useful for duration estimation.
+ if (mMP3Offset > -1) {
+ uint16_t skip = frameLen - sizeof(MP3Frame);
+ buffer += skip ? skip : 1;
+ continue;
+ }
+
+ // Remember the offset of the MP3 stream.
+ // We're at the last byte of an MP3Frame, so MP3 data started
+ // sizeof(MP3Frame) - 1 bytes ago.
+ mMP3Offset = aStreamOffset
+ + (buffer - aBuffer)
+ - (sizeof(MP3Frame) - 1);
+
+ buffer++;
+
+ // If the stream has a variable bitrate, the first frame has metadata
+ // we need for duration estimation and seeking. Start buffering it so we
+ // can parse it later.
+ mFirstFrameEnd = mMP3Offset + frameLen;
+ uint64_t currOffset = buffer - aBuffer + aStreamOffset;
+ uint64_t copyLen = std::min(mFirstFrameEnd - currOffset,
+ (uint64_t)(bufferEnd - buffer));
+ mFirstFrame.Append((const char *)buffer, copyLen);
+
+ buffer += copyLen;
+
+ } else {
+ // Nothing to see here. Move along.
+ buffer++;
+ }
+ }
+
+ *aOutBytesRead = buffer - aBuffer;
+
+ if (mFirstFrameEnd > -1 && mFirstFrameEnd <= aStreamOffset + buffer - aBuffer) {
+ // We have our whole first frame. Try to find a VBR header.
+ mNumFrames = FindNumVBRFrames(mFirstFrame);
+ mFirstFrameEnd = -1;
+ }
+
+ return NS_OK;
+}
+
+void MP3FrameParser::Parse(const uint8_t* aBuffer, uint32_t aLength, uint64_t aOffset)
+{
+ MutexAutoLock mon(mLock);
+
+ if (HasExactDuration()) {
+ // We know the duration; nothing to do here.
+ return;
+ }
+
+ const uint8_t* buffer = aBuffer;
+ int32_t length = aLength;
+ uint64_t offset = aOffset;
+
+ // Got some data we have seen already. Skip forward to what we need.
+ if (aOffset < mOffset) {
+ buffer += mOffset - aOffset;
+ length -= mOffset - aOffset;
+ offset = mOffset;
+
+ if (length <= 0) {
+ return;
+ }
+ }
+
+ // If there is a discontinuity in the input stream, reset the state of the
+ // parsers so we don't get any partial headers.
+ if (mOffset < aOffset) {
+ if (!mID3Parser.IsParsed()) {
+ // Only reset this if it hasn't finished yet.
+ mID3Parser.Reset();
+ }
+
+ if (mFirstFrameEnd > -1) {
+ NS_WARNING("Discontinuity in input while buffering first frame.");
+ mFirstFrameEnd = -1;
+ }
+
+ mMP3Parser.Reset();
+ }
+
+ uint32_t bytesRead = 0;
+ if (NS_FAILED(ParseBuffer(buffer,
+ length,
+ offset,
+ &bytesRead))) {
+ return;
+ }
+
+ MOZ_ASSERT(length <= (int)bytesRead, "All bytes should have been consumed");
+
+ // Update next data offset
+ mOffset = offset + bytesRead;
+
+ // If we've parsed lots of data and we still have nothing, just give up.
+ // We don't count ID3 headers towards the skipped bytes count, as MP3 files
+ // can have massive ID3 sections.
+ if (!mID3Parser.IsParsed() && mMP3Offset < 0 &&
+ mOffset - mTotalID3Size > MAX_SKIPPED_BYTES) {
+ mIsMP3 = NOT_MP3;
+ }
+}
+
+int64_t MP3FrameParser::GetDuration()
+{
+ MutexAutoLock mon(mLock);
+
+ if (!ParsedHeaders() || !mSamplesPerSecond) {
+ // Not a single frame decoded yet.
+ return -1;
+ }
+
+ MOZ_ASSERT(mFrameCount > 0 && mTotalFrameSize > 0,
+ "Frame parser should have seen at least one MP3 frame of positive length.");
+
+ if (!mFrameCount || !mTotalFrameSize) {
+ // This should never happen.
+ return -1;
+ }
+
+ double frames;
+ if (mNumFrames < 0) {
+ // Estimate the number of frames in the stream based on the average frame
+ // size and the length of the MP3 file.
+ double frameSize = (double)mTotalFrameSize / mFrameCount;
+ frames = (double)(mLength - mMP3Offset) / frameSize;
+ } else {
+ // We know the exact number of frames from the VBR header.
+ frames = mNumFrames;
+ }
+
+ // The duration of each frame is constant over a given stream.
+ double usPerFrame = USECS_PER_S * mSamplesPerFrame / mSamplesPerSecond;
+
+ return frames * usPerFrame;
+}
+
+int64_t MP3FrameParser::GetMP3Offset()
+{
+ MutexAutoLock mon(mLock);
+ return mMP3Offset;
+}
+
+bool MP3FrameParser::ParsedHeaders()
+{
+ // We have seen both the beginning and the end of the first MP3 frame in the
+ // stream.
+ return mMP3Offset > -1 && mFirstFrameEnd < 0;
+}
+
+bool MP3FrameParser::HasExactDuration()
+{
+ return ParsedHeaders() && mNumFrames > -1;
+}
+
+bool MP3FrameParser::NeedsData()
+{
+ // If we don't know the duration exactly then either:
+ // - we're still waiting for a VBR header; or
+ // - we look at all frames to constantly update our duration estimate.
+ return IsMP3() && !HasExactDuration();
+}
+
+} // namespace mozilla