diff options
Diffstat (limited to 'dom/media/MP3FrameParser.cpp')
-rw-r--r-- | dom/media/MP3FrameParser.cpp | 591 |
1 files changed, 591 insertions, 0 deletions
diff --git a/dom/media/MP3FrameParser.cpp b/dom/media/MP3FrameParser.cpp new file mode 100644 index 000000000..242e3df00 --- /dev/null +++ b/dom/media/MP3FrameParser.cpp @@ -0,0 +1,591 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <algorithm> + +#include "nsMemory.h" +#include "MP3FrameParser.h" +#include "VideoUtils.h" + + +#define FROM_BIG_ENDIAN(X) ((uint32_t)((uint8_t)(X)[0] << 24 | (uint8_t)(X)[1] << 16 | \ + (uint8_t)(X)[2] << 8 | (uint8_t)(X)[3])) + + +namespace mozilla { + +/* + * Following code taken from http://www.hydrogenaudio.org/forums/index.php?showtopic=85125 + * with permission from the author, Nick Wallette <sirnickity@gmail.com>. + */ + +/* BEGIN shameless copy and paste */ + +// Bitrates - use [version][layer][bitrate] +const uint16_t mpeg_bitrates[4][4][16] = { + { // Version 2.5 + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3 + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2 + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1 + }, + { // Reserved + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // Invalid + }, + { // Version 2 + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3 + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2 + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1 + }, + { // Version 1 + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved + { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0 }, // Layer 3 + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, // Layer 2 + { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 }, // Layer 1 + } +}; + +// Sample rates - use [version][srate] +const uint16_t mpeg_srates[4][4] = { + { 11025, 12000, 8000, 0 }, // MPEG 2.5 + { 0, 0, 0, 0 }, // Reserved + { 22050, 24000, 16000, 0 }, // MPEG 2 + { 44100, 48000, 32000, 0 } // MPEG 1 +}; + +// Samples per frame - use [version][layer] +const uint16_t mpeg_frame_samples[4][4] = { +// Rsvd 3 2 1 < Layer v Version + { 0, 576, 1152, 384 }, // 2.5 + { 0, 0, 0, 0 }, // Reserved + { 0, 576, 1152, 384 }, // 2 + { 0, 1152, 1152, 384 } // 1 +}; + +// Slot size (MPEG unit of measurement) - use [layer] +const uint8_t mpeg_slot_size[4] = { 0, 1, 1, 4 }; // Rsvd, 3, 2, 1 + +uint16_t +MP3Frame::CalculateLength() +{ + // Lookup real values of these fields + uint32_t bitrate = mpeg_bitrates[mVersion][mLayer][mBitrate] * 1000; + uint32_t samprate = mpeg_srates[mVersion][mSampleRate]; + uint16_t samples = mpeg_frame_samples[mVersion][mLayer]; + uint8_t slot_size = mpeg_slot_size[mLayer]; + + // In-between calculations + float bps = (float)samples / 8.0; + float fsize = ( (bps * (float)bitrate) / (float)samprate ) + + ( (mPad) ? slot_size : 0 ); + + // Frame sizes are truncated integers + return (uint16_t)fsize; +} + +/* END shameless copy and paste */ + + +/** MP3Parser methods **/ + +MP3Parser::MP3Parser() + : mCurrentChar(0) +{ } + +void +MP3Parser::Reset() +{ + mCurrentChar = 0; +} + +uint16_t +MP3Parser::ParseFrameLength(uint8_t ch) +{ + mData.mRaw[mCurrentChar] = ch; + + MP3Frame &frame = mData.mFrame; + + // Validate MP3 header as we read. We can't mistake the start of an MP3 frame + // for the middle of another frame due to the sync byte at the beginning + // of the frame. + + // The only valid position for an all-high byte is the sync byte at the + // beginning of the frame. + if (ch == 0xff) { + mCurrentChar = 0; + } + + // Make sure the current byte is valid in context. If not, reset the parser. + if (mCurrentChar == 2) { + if (frame.mBitrate == 0x0f) { + goto fail; + } + } else if (mCurrentChar == 1) { + if (frame.mSync2 != 0x07 + || frame.mVersion == 0x01 + || frame.mLayer == 0x00) { + goto fail; + } + } + + // The only valid character at the beginning of the header is 0xff. Fail if + // it's different. + if (mCurrentChar == 0 && frame.mSync1 != 0xff) { + // Couldn't find the sync byte. Fail. + return 0; + } + + mCurrentChar++; + MOZ_ASSERT(mCurrentChar <= sizeof(MP3Frame)); + + // Don't have a full header yet. + if (mCurrentChar < sizeof(MP3Frame)) { + return 0; + } + + // Woo, valid header. Return the length. + mCurrentChar = 0; + return frame.CalculateLength(); + +fail: + Reset(); + return 0; +} + +uint32_t +MP3Parser::GetSampleRate() +{ + MP3Frame &frame = mData.mFrame; + return mpeg_srates[frame.mVersion][frame.mSampleRate]; +} + +uint32_t +MP3Parser::GetSamplesPerFrame() +{ + MP3Frame &frame = mData.mFrame; + return mpeg_frame_samples[frame.mVersion][frame.mLayer]; +} + + +/** ID3Parser methods **/ + +const char sID3Head[3] = { 'I', 'D', '3' }; +const uint32_t ID3_HEADER_LENGTH = 10; +const uint32_t ID3_FOOTER_LENGTH = 10; +const uint8_t ID3_FOOTER_PRESENT = 0x10; + +ID3Parser::ID3Parser() + : mCurrentChar(0) + , mVersion(0) + , mFlags(0) + , mHeaderLength(0) +{ } + +void +ID3Parser::Reset() +{ + mCurrentChar = mVersion = mFlags = mHeaderLength = 0; +} + +bool +ID3Parser::ParseChar(char ch) +{ + switch (mCurrentChar) { + // The first three bytes of an ID3v2 header must match the string "ID3". + case 0: case 1: case 2: + if (ch != sID3Head[mCurrentChar]) { + goto fail; + } + break; + // The fourth and fifth bytes give the version, between 2 and 4. + case 3: + if (ch < '\2' || ch > '\4') { + goto fail; + } + mVersion = uint8_t(ch); + break; + case 4: + if (ch != '\0') { + goto fail; + } + break; + // The sixth byte gives the flags; valid flags depend on the version. + case 5: + if ((ch & (0xff >> mVersion)) != '\0') { + goto fail; + } + mFlags = uint8_t(ch); + break; + // Bytes seven through ten give the sum of the byte length of the extended + // header, the padding and the frames after unsynchronisation. + // These bytes form a 28-bit integer, with the high bit of each byte unset. + case 6: case 7: case 8: case 9: + if (ch & 0x80) { + goto fail; + } + mHeaderLength <<= 7; + mHeaderLength |= ch; + if (mCurrentChar == 9) { + mHeaderLength += ID3_HEADER_LENGTH; + mHeaderLength += (mFlags & ID3_FOOTER_PRESENT) ? ID3_FOOTER_LENGTH : 0; + } + break; + default: + MOZ_CRASH("Header already fully parsed!"); + } + + mCurrentChar++; + + return IsParsed(); + +fail: + if (mCurrentChar) { + Reset(); + return ParseChar(ch); + } + Reset(); + return false; +} + +bool +ID3Parser::IsParsed() const +{ + return mCurrentChar >= ID3_HEADER_LENGTH; +} + +uint32_t +ID3Parser::GetHeaderLength() const +{ + MOZ_ASSERT(IsParsed(), + "Queried length of ID3 header before parsing finished."); + return mHeaderLength; +} + + +/** VBR header helper stuff **/ + +// Helper function to find a VBR header in an MP3 frame. +// Based on information from +// http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header + +const uint32_t VBRI_TAG = FROM_BIG_ENDIAN("VBRI"); +const uint32_t VBRI_OFFSET = 32 - sizeof(MP3Frame); +const uint32_t VBRI_FRAME_COUNT_OFFSET = VBRI_OFFSET + 14; +const uint32_t VBRI_MIN_FRAME_SIZE = VBRI_OFFSET + 26; + +const uint32_t XING_TAG = FROM_BIG_ENDIAN("Xing"); +enum XingFlags { + XING_HAS_NUM_FRAMES = 0x01, + XING_HAS_NUM_BYTES = 0x02, + XING_HAS_TOC = 0x04, + XING_HAS_VBR_SCALE = 0x08 +}; + +static int64_t +ParseXing(const char *aBuffer) +{ + uint32_t flags = FROM_BIG_ENDIAN(aBuffer + 4); + + if (!(flags & XING_HAS_NUM_FRAMES)) { + NS_WARNING("VBR file without frame count. Duration estimation likely to " + "be totally wrong."); + return -1; + } + + int64_t numFrames = -1; + if (flags & XING_HAS_NUM_FRAMES) { + numFrames = FROM_BIG_ENDIAN(aBuffer + 8); + } + + return numFrames; +} + +static int64_t +FindNumVBRFrames(const nsCString& aFrame) +{ + const char *buffer = aFrame.get(); + const char *bufferEnd = aFrame.get() + aFrame.Length(); + + // VBRI header is nice and well-defined; let's try to find that first. + if (aFrame.Length() > VBRI_MIN_FRAME_SIZE && + FROM_BIG_ENDIAN(buffer + VBRI_OFFSET) == VBRI_TAG) { + return FROM_BIG_ENDIAN(buffer + VBRI_FRAME_COUNT_OFFSET); + } + + // We have to search for the Xing header as its position can change. + for (; buffer + sizeof(XING_TAG) < bufferEnd; buffer++) { + if (FROM_BIG_ENDIAN(buffer) == XING_TAG) { + return ParseXing(buffer); + } + } + + return -1; +} + + +/** MP3FrameParser methods **/ + +// Some MP3's have large ID3v2 tags, up to 150KB, so we allow lots of +// skipped bytes to be read, just in case, before we give up and assume +// we're not parsing an MP3 stream. +static const uint32_t MAX_SKIPPED_BYTES = 4096; + +enum { + MP3_HEADER_LENGTH = 4, +}; + +MP3FrameParser::MP3FrameParser(int64_t aLength) +: mLock("MP3FrameParser.mLock"), + mTotalID3Size(0), + mTotalFrameSize(0), + mFrameCount(0), + mOffset(0), + mLength(aLength), + mMP3Offset(-1), + mSamplesPerSecond(0), + mFirstFrameEnd(-1), + mIsMP3(MAYBE_MP3) +{ } + +nsresult MP3FrameParser::ParseBuffer(const uint8_t* aBuffer, + uint32_t aLength, + int64_t aStreamOffset, + uint32_t* aOutBytesRead) +{ + // Iterate forwards over the buffer, looking for ID3 tag, or MP3 + // Frame headers. + const uint8_t *buffer = aBuffer; + const uint8_t *bufferEnd = aBuffer + aLength; + + // If we haven't found any MP3 frame data yet, there might be ID3 headers + // we can skip over. + if (mMP3Offset < 0) { + for (const uint8_t *ch = buffer; ch < bufferEnd; ch++) { + if (mID3Parser.ParseChar(*ch)) { + // Found an ID3 header. We don't care about the body of the header, so + // just skip past. + buffer = ch + mID3Parser.GetHeaderLength() - (ID3_HEADER_LENGTH - 1); + + if (buffer <= ch) { + return NS_ERROR_FAILURE; + } + + ch = buffer; + + mTotalID3Size += mID3Parser.GetHeaderLength(); + + // Yes, this is an MP3! + mIsMP3 = DEFINITELY_MP3; + + mID3Parser.Reset(); + } + } + } + + // The first MP3 frame in a variable bitrate stream can contain metadata + // for duration estimation and seeking, so we buffer that first frame here. + if (aStreamOffset < mFirstFrameEnd) { + uint64_t copyLen = std::min((int64_t)aLength, mFirstFrameEnd - aStreamOffset); + mFirstFrame.Append((const char *)buffer, copyLen); + buffer += copyLen; + } + + while (buffer < bufferEnd) { + uint16_t frameLen = mMP3Parser.ParseFrameLength(*buffer); + + if (frameLen) { + // We've found an MP3 frame! + // This is the first frame (and the only one we'll bother parsing), so: + // * Mark this stream as MP3; + // * Store the offset at which the MP3 data started; and + // * Start buffering the frame, as it might contain handy metadata. + + // We're now sure this is an MP3 stream. + mIsMP3 = DEFINITELY_MP3; + + // We need to know these to convert the number of frames in the stream + // to the length of the stream in seconds. + mSamplesPerSecond = mMP3Parser.GetSampleRate(); + mSamplesPerFrame = mMP3Parser.GetSamplesPerFrame(); + + // If the stream has a constant bitrate, we should only need the length + // of the first frame and the length (in bytes) of the stream to + // estimate the length (in seconds). + mTotalFrameSize += frameLen; + mFrameCount++; + + // If |mMP3Offset| isn't set then this is the first MP3 frame we have + // seen in the stream, which is useful for duration estimation. + if (mMP3Offset > -1) { + uint16_t skip = frameLen - sizeof(MP3Frame); + buffer += skip ? skip : 1; + continue; + } + + // Remember the offset of the MP3 stream. + // We're at the last byte of an MP3Frame, so MP3 data started + // sizeof(MP3Frame) - 1 bytes ago. + mMP3Offset = aStreamOffset + + (buffer - aBuffer) + - (sizeof(MP3Frame) - 1); + + buffer++; + + // If the stream has a variable bitrate, the first frame has metadata + // we need for duration estimation and seeking. Start buffering it so we + // can parse it later. + mFirstFrameEnd = mMP3Offset + frameLen; + uint64_t currOffset = buffer - aBuffer + aStreamOffset; + uint64_t copyLen = std::min(mFirstFrameEnd - currOffset, + (uint64_t)(bufferEnd - buffer)); + mFirstFrame.Append((const char *)buffer, copyLen); + + buffer += copyLen; + + } else { + // Nothing to see here. Move along. + buffer++; + } + } + + *aOutBytesRead = buffer - aBuffer; + + if (mFirstFrameEnd > -1 && mFirstFrameEnd <= aStreamOffset + buffer - aBuffer) { + // We have our whole first frame. Try to find a VBR header. + mNumFrames = FindNumVBRFrames(mFirstFrame); + mFirstFrameEnd = -1; + } + + return NS_OK; +} + +void MP3FrameParser::Parse(const uint8_t* aBuffer, uint32_t aLength, uint64_t aOffset) +{ + MutexAutoLock mon(mLock); + + if (HasExactDuration()) { + // We know the duration; nothing to do here. + return; + } + + const uint8_t* buffer = aBuffer; + int32_t length = aLength; + uint64_t offset = aOffset; + + // Got some data we have seen already. Skip forward to what we need. + if (aOffset < mOffset) { + buffer += mOffset - aOffset; + length -= mOffset - aOffset; + offset = mOffset; + + if (length <= 0) { + return; + } + } + + // If there is a discontinuity in the input stream, reset the state of the + // parsers so we don't get any partial headers. + if (mOffset < aOffset) { + if (!mID3Parser.IsParsed()) { + // Only reset this if it hasn't finished yet. + mID3Parser.Reset(); + } + + if (mFirstFrameEnd > -1) { + NS_WARNING("Discontinuity in input while buffering first frame."); + mFirstFrameEnd = -1; + } + + mMP3Parser.Reset(); + } + + uint32_t bytesRead = 0; + if (NS_FAILED(ParseBuffer(buffer, + length, + offset, + &bytesRead))) { + return; + } + + MOZ_ASSERT(length <= (int)bytesRead, "All bytes should have been consumed"); + + // Update next data offset + mOffset = offset + bytesRead; + + // If we've parsed lots of data and we still have nothing, just give up. + // We don't count ID3 headers towards the skipped bytes count, as MP3 files + // can have massive ID3 sections. + if (!mID3Parser.IsParsed() && mMP3Offset < 0 && + mOffset - mTotalID3Size > MAX_SKIPPED_BYTES) { + mIsMP3 = NOT_MP3; + } +} + +int64_t MP3FrameParser::GetDuration() +{ + MutexAutoLock mon(mLock); + + if (!ParsedHeaders() || !mSamplesPerSecond) { + // Not a single frame decoded yet. + return -1; + } + + MOZ_ASSERT(mFrameCount > 0 && mTotalFrameSize > 0, + "Frame parser should have seen at least one MP3 frame of positive length."); + + if (!mFrameCount || !mTotalFrameSize) { + // This should never happen. + return -1; + } + + double frames; + if (mNumFrames < 0) { + // Estimate the number of frames in the stream based on the average frame + // size and the length of the MP3 file. + double frameSize = (double)mTotalFrameSize / mFrameCount; + frames = (double)(mLength - mMP3Offset) / frameSize; + } else { + // We know the exact number of frames from the VBR header. + frames = mNumFrames; + } + + // The duration of each frame is constant over a given stream. + double usPerFrame = USECS_PER_S * mSamplesPerFrame / mSamplesPerSecond; + + return frames * usPerFrame; +} + +int64_t MP3FrameParser::GetMP3Offset() +{ + MutexAutoLock mon(mLock); + return mMP3Offset; +} + +bool MP3FrameParser::ParsedHeaders() +{ + // We have seen both the beginning and the end of the first MP3 frame in the + // stream. + return mMP3Offset > -1 && mFirstFrameEnd < 0; +} + +bool MP3FrameParser::HasExactDuration() +{ + return ParsedHeaders() && mNumFrames > -1; +} + +bool MP3FrameParser::NeedsData() +{ + // If we don't know the duration exactly then either: + // - we're still waiting for a VBR header; or + // - we look at all frames to constantly update our duration estimate. + return IsMP3() && !HasExactDuration(); +} + +} // namespace mozilla |