/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include <stdio.h> #include <math.h> #include <string.h> #include "mozilla/Logging.h" #include "prdtoa.h" #include "AudioStream.h" #include "VideoUtils.h" #include "mozilla/Monitor.h" #include "mozilla/Mutex.h" #include "mozilla/Sprintf.h" #include <algorithm> #include "mozilla/Telemetry.h" #include "CubebUtils.h" #include "nsPrintfCString.h" #include "gfxPrefs.h" #include "AudioConverter.h" namespace mozilla { #undef LOG #undef LOGW LazyLogModule gAudioStreamLog("AudioStream"); // For simple logs #define LOG(x, ...) MOZ_LOG(gAudioStreamLog, mozilla::LogLevel::Debug, ("%p " x, this, ##__VA_ARGS__)) #define LOGW(x, ...) MOZ_LOG(gAudioStreamLog, mozilla::LogLevel::Warning, ("%p " x, this, ##__VA_ARGS__)) /** * Keep a list of frames sent to the audio engine in each DataCallback along * with the playback rate at the moment. Since the playback rate and number of * underrun frames can vary in each callback. We need to keep the whole history * in order to calculate the playback position of the audio engine correctly. */ class FrameHistory { struct Chunk { uint32_t servicedFrames; uint32_t totalFrames; uint32_t rate; }; template <typename T> static T FramesToUs(uint32_t frames, int rate) { return static_cast<T>(frames) * USECS_PER_S / rate; } public: FrameHistory() : mBaseOffset(0), mBasePosition(0) {} void Append(uint32_t aServiced, uint32_t aUnderrun, uint32_t aRate) { /* In most case where playback rate stays the same and we don't underrun * frames, we are able to merge chunks to avoid lose of precision to add up * in compressing chunks into |mBaseOffset| and |mBasePosition|. */ if (!mChunks.IsEmpty()) { Chunk& c = mChunks.LastElement(); // 2 chunks (c1 and c2) can be merged when rate is the same and // adjacent frames are zero. That is, underrun frames in c1 are zero // or serviced frames in c2 are zero. if (c.rate == aRate && (c.servicedFrames == c.totalFrames || aServiced == 0)) { c.servicedFrames += aServiced; c.totalFrames += aServiced + aUnderrun; return; } } Chunk* p = mChunks.AppendElement(); p->servicedFrames = aServiced; p->totalFrames = aServiced + aUnderrun; p->rate = aRate; } /** * @param frames The playback position in frames of the audio engine. * @return The playback position in microseconds of the audio engine, * adjusted by playback rate changes and underrun frames. */ int64_t GetPosition(int64_t frames) { // playback position should not go backward. MOZ_ASSERT(frames >= mBaseOffset); while (true) { if (mChunks.IsEmpty()) { return mBasePosition; } const Chunk& c = mChunks[0]; if (frames <= mBaseOffset + c.totalFrames) { uint32_t delta = frames - mBaseOffset; delta = std::min(delta, c.servicedFrames); return static_cast<int64_t>(mBasePosition) + FramesToUs<int64_t>(delta, c.rate); } // Since the playback position of the audio engine will not go backward, // we are able to compress chunks so that |mChunks| won't grow unlimitedly. // Note that we lose precision in converting integers into floats and // inaccuracy will accumulate over time. However, for a 24hr long, // sample rate = 44.1k file, the error will be less than 1 microsecond // after playing 24 hours. So we are fine with that. mBaseOffset += c.totalFrames; mBasePosition += FramesToUs<double>(c.servicedFrames, c.rate); mChunks.RemoveElementAt(0); } } private: AutoTArray<Chunk, 7> mChunks; int64_t mBaseOffset; double mBasePosition; }; AudioStream::AudioStream(DataSource& aSource) : mMonitor("AudioStream") , mChannels(0) , mOutChannels(0) , mTimeStretcher(nullptr) , mDumpFile(nullptr) , mState(INITIALIZED) , mDataSource(aSource) { } AudioStream::~AudioStream() { LOG("deleted, state %d", mState); MOZ_ASSERT(mState == SHUTDOWN && !mCubebStream, "Should've called Shutdown() before deleting an AudioStream"); if (mDumpFile) { fclose(mDumpFile); } if (mTimeStretcher) { soundtouch::destroySoundTouchObj(mTimeStretcher); } } size_t AudioStream::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { size_t amount = aMallocSizeOf(this); // Possibly add in the future: // - mTimeStretcher // - mCubebStream return amount; } nsresult AudioStream::EnsureTimeStretcherInitializedUnlocked() { mMonitor.AssertCurrentThreadOwns(); if (!mTimeStretcher) { mTimeStretcher = soundtouch::createSoundTouchObj(); mTimeStretcher->setSampleRate(mAudioClock.GetInputRate()); mTimeStretcher->setChannels(mOutChannels); mTimeStretcher->setPitch(1.0); } return NS_OK; } nsresult AudioStream::SetPlaybackRate(double aPlaybackRate) { // MUST lock since the rate transposer is used from the cubeb callback, // and rate changes can cause the buffer to be reallocated MonitorAutoLock mon(mMonitor); NS_ASSERTION(aPlaybackRate > 0.0, "Can't handle negative or null playbackrate in the AudioStream."); // Avoid instantiating the resampler if we are not changing the playback rate. // GetPreservesPitch/SetPreservesPitch don't need locking before calling if (aPlaybackRate == mAudioClock.GetPlaybackRate()) { return NS_OK; } if (EnsureTimeStretcherInitializedUnlocked() != NS_OK) { return NS_ERROR_FAILURE; } mAudioClock.SetPlaybackRate(aPlaybackRate); if (mAudioClock.GetPreservesPitch()) { mTimeStretcher->setTempo(aPlaybackRate); mTimeStretcher->setRate(1.0f); } else { mTimeStretcher->setTempo(1.0f); mTimeStretcher->setRate(aPlaybackRate); } return NS_OK; } nsresult AudioStream::SetPreservesPitch(bool aPreservesPitch) { // MUST lock since the rate transposer is used from the cubeb callback, // and rate changes can cause the buffer to be reallocated MonitorAutoLock mon(mMonitor); // Avoid instantiating the timestretcher instance if not needed. if (aPreservesPitch == mAudioClock.GetPreservesPitch()) { return NS_OK; } if (EnsureTimeStretcherInitializedUnlocked() != NS_OK) { return NS_ERROR_FAILURE; } if (aPreservesPitch == true) { mTimeStretcher->setTempo(mAudioClock.GetPlaybackRate()); mTimeStretcher->setRate(1.0f); } else { mTimeStretcher->setTempo(1.0f); mTimeStretcher->setRate(mAudioClock.GetPlaybackRate()); } mAudioClock.SetPreservesPitch(aPreservesPitch); return NS_OK; } static void SetUint16LE(uint8_t* aDest, uint16_t aValue) { aDest[0] = aValue & 0xFF; aDest[1] = aValue >> 8; } static void SetUint32LE(uint8_t* aDest, uint32_t aValue) { SetUint16LE(aDest, aValue & 0xFFFF); SetUint16LE(aDest + 2, aValue >> 16); } static FILE* OpenDumpFile(uint32_t aChannels, uint32_t aRate) { /** * When MOZ_DUMP_AUDIO is set in the environment (to anything), * we'll drop a series of files in the current working directory named * dumped-audio-<nnn>.wav, one per AudioStream created, containing * the audio for the stream including any skips due to underruns. */ static Atomic<int> gDumpedAudioCount(0); if (!getenv("MOZ_DUMP_AUDIO")) return nullptr; char buf[100]; SprintfLiteral(buf, "dumped-audio-%d.wav", ++gDumpedAudioCount); FILE* f = fopen(buf, "wb"); if (!f) return nullptr; uint8_t header[] = { // RIFF header 0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56, 0x45, // fmt chunk. We always write 16-bit samples. 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x10, 0x00, // data chunk 0x64, 0x61, 0x74, 0x61, 0xFE, 0xFF, 0xFF, 0x7F }; static const int CHANNEL_OFFSET = 22; static const int SAMPLE_RATE_OFFSET = 24; static const int BLOCK_ALIGN_OFFSET = 32; SetUint16LE(header + CHANNEL_OFFSET, aChannels); SetUint32LE(header + SAMPLE_RATE_OFFSET, aRate); SetUint16LE(header + BLOCK_ALIGN_OFFSET, aChannels * 2); fwrite(header, sizeof(header), 1, f); return f; } template <typename T> typename EnableIf<IsSame<T, int16_t>::value, void>::Type WriteDumpFileHelper(T* aInput, size_t aSamples, FILE* aFile) { fwrite(aInput, sizeof(T), aSamples, aFile); } template <typename T> typename EnableIf<IsSame<T, float>::value, void>::Type WriteDumpFileHelper(T* aInput, size_t aSamples, FILE* aFile) { AutoTArray<uint8_t, 1024*2> buf; buf.SetLength(aSamples*2); uint8_t* output = buf.Elements(); for (uint32_t i = 0; i < aSamples; ++i) { SetUint16LE(output + i*2, int16_t(aInput[i]*32767.0f)); } fwrite(output, 2, aSamples, aFile); fflush(aFile); } static void WriteDumpFile(FILE* aDumpFile, AudioStream* aStream, uint32_t aFrames, void* aBuffer) { if (!aDumpFile) return; uint32_t samples = aStream->GetOutChannels()*aFrames; using SampleT = AudioSampleTraits<AUDIO_OUTPUT_FORMAT>::Type; WriteDumpFileHelper(reinterpret_cast<SampleT*>(aBuffer), samples, aDumpFile); } template <AudioSampleFormat N> struct ToCubebFormat { static const cubeb_sample_format value = CUBEB_SAMPLE_FLOAT32NE; }; template <> struct ToCubebFormat<AUDIO_FORMAT_S16> { static const cubeb_sample_format value = CUBEB_SAMPLE_S16NE; }; template <typename Function, typename... Args> int AudioStream::InvokeCubeb(Function aFunction, Args&&... aArgs) { MonitorAutoUnlock mon(mMonitor); return aFunction(mCubebStream.get(), Forward<Args>(aArgs)...); } nsresult AudioStream::Init(uint32_t aNumChannels, uint32_t aRate, const dom::AudioChannel aAudioChannel) { auto startTime = TimeStamp::Now(); LOG("%s channels: %d, rate: %d", __FUNCTION__, aNumChannels, aRate); mChannels = aNumChannels; mOutChannels = aNumChannels; mDumpFile = OpenDumpFile(aNumChannels, aRate); cubeb_stream_params params; params.rate = aRate; params.channels = mOutChannels; #if defined(__ANDROID__) #if defined(MOZ_B2G) params.stream_type = CubebUtils::ConvertChannelToCubebType(aAudioChannel); #else params.stream_type = CUBEB_STREAM_TYPE_MUSIC; #endif if (params.stream_type == CUBEB_STREAM_TYPE_MAX) { return NS_ERROR_INVALID_ARG; } #endif params.format = ToCubebFormat<AUDIO_OUTPUT_FORMAT>::value; mAudioClock.Init(aRate); cubeb* cubebContext = CubebUtils::GetCubebContext(); if (!cubebContext) { NS_WARNING("Can't get cubeb context!"); CubebUtils::ReportCubebStreamInitFailure(true); return NS_ERROR_DOM_MEDIA_CUBEB_INITIALIZATION_ERR; } return OpenCubeb(cubebContext, params, startTime, CubebUtils::GetFirstStream()); } nsresult AudioStream::OpenCubeb(cubeb* aContext, cubeb_stream_params& aParams, TimeStamp aStartTime, bool aIsFirst) { MOZ_ASSERT(aContext); cubeb_stream* stream = nullptr; /* Convert from milliseconds to frames. */ uint32_t latency_frames = CubebUtils::GetCubebPlaybackLatencyInMilliseconds() * aParams.rate / 1000; if (cubeb_stream_init(aContext, &stream, "AudioStream", nullptr, nullptr, nullptr, &aParams, latency_frames, DataCallback_S, StateCallback_S, this) == CUBEB_OK) { mCubebStream.reset(stream); CubebUtils::ReportCubebBackendUsed(); } else { NS_WARNING(nsPrintfCString("AudioStream::OpenCubeb() %p failed to init cubeb", this).get()); CubebUtils::ReportCubebStreamInitFailure(aIsFirst); return NS_ERROR_FAILURE; } TimeDuration timeDelta = TimeStamp::Now() - aStartTime; LOG("creation time %sfirst: %u ms", aIsFirst ? "" : "not ", (uint32_t) timeDelta.ToMilliseconds()); Telemetry::Accumulate(aIsFirst ? Telemetry::AUDIOSTREAM_FIRST_OPEN_MS : Telemetry::AUDIOSTREAM_LATER_OPEN_MS, timeDelta.ToMilliseconds()); return NS_OK; } void AudioStream::SetVolume(double aVolume) { MOZ_ASSERT(aVolume >= 0.0 && aVolume <= 1.0, "Invalid volume"); if (cubeb_stream_set_volume(mCubebStream.get(), aVolume * CubebUtils::GetVolumeScale()) != CUBEB_OK) { NS_WARNING("Could not change volume on cubeb stream."); } } void AudioStream::Start() { MonitorAutoLock mon(mMonitor); MOZ_ASSERT(mState == INITIALIZED); mState = STARTED; auto r = InvokeCubeb(cubeb_stream_start); if (r != CUBEB_OK) { mState = ERRORED; } LOG("started, state %s", mState == STARTED ? "STARTED" : mState == DRAINED ? "DRAINED" : "ERRORED"); } void AudioStream::Pause() { MonitorAutoLock mon(mMonitor); MOZ_ASSERT(mState != INITIALIZED, "Must be Start()ed."); MOZ_ASSERT(mState != STOPPED, "Already Pause()ed."); MOZ_ASSERT(mState != SHUTDOWN, "Already Shutdown()ed."); // Do nothing if we are already drained or errored. if (mState == DRAINED || mState == ERRORED) { return; } if (InvokeCubeb(cubeb_stream_stop) != CUBEB_OK) { mState = ERRORED; } else if (mState != DRAINED && mState != ERRORED) { // Don't transition to other states if we are already // drained or errored. mState = STOPPED; } } void AudioStream::Resume() { MonitorAutoLock mon(mMonitor); MOZ_ASSERT(mState != INITIALIZED, "Must be Start()ed."); MOZ_ASSERT(mState != STARTED, "Already Start()ed."); MOZ_ASSERT(mState != SHUTDOWN, "Already Shutdown()ed."); // Do nothing if we are already drained or errored. if (mState == DRAINED || mState == ERRORED) { return; } if (InvokeCubeb(cubeb_stream_start) != CUBEB_OK) { mState = ERRORED; } else if (mState != DRAINED && mState != ERRORED) { // Don't transition to other states if we are already // drained or errored. mState = STARTED; } } void AudioStream::Shutdown() { MonitorAutoLock mon(mMonitor); LOG("Shutdown, state %d", mState); if (mCubebStream) { MonitorAutoUnlock mon(mMonitor); // Force stop to put the cubeb stream in a stable state before deletion. cubeb_stream_stop(mCubebStream.get()); // Must not try to shut down cubeb from within the lock! wasapi may still // call our callback after Pause()/stop()!?! Bug 996162 mCubebStream.reset(); } mState = SHUTDOWN; } int64_t AudioStream::GetPosition() { MonitorAutoLock mon(mMonitor); int64_t frames = GetPositionInFramesUnlocked(); return frames >= 0 ? mAudioClock.GetPosition(frames) : -1; } int64_t AudioStream::GetPositionInFrames() { MonitorAutoLock mon(mMonitor); int64_t frames = GetPositionInFramesUnlocked(); return frames >= 0 ? mAudioClock.GetPositionInFrames(frames) : -1; } int64_t AudioStream::GetPositionInFramesUnlocked() { mMonitor.AssertCurrentThreadOwns(); if (mState == ERRORED) { return -1; } uint64_t position = 0; if (InvokeCubeb(cubeb_stream_get_position, &position) != CUBEB_OK) { return -1; } return std::min<uint64_t>(position, INT64_MAX); } bool AudioStream::IsValidAudioFormat(Chunk* aChunk) { if (aChunk->Rate() != mAudioClock.GetInputRate()) { LOGW("mismatched sample %u, mInRate=%u", aChunk->Rate(), mAudioClock.GetInputRate()); return false; } if (aChunk->Channels() > 8) { return false; } return true; } void AudioStream::GetUnprocessed(AudioBufferWriter& aWriter) { mMonitor.AssertCurrentThreadOwns(); // Flush the timestretcher pipeline, if we were playing using a playback rate // other than 1.0. if (mTimeStretcher && mTimeStretcher->numSamples()) { auto timeStretcher = mTimeStretcher; aWriter.Write([timeStretcher] (AudioDataValue* aPtr, uint32_t aFrames) { return timeStretcher->receiveSamples(aPtr, aFrames); }, aWriter.Available()); // TODO: There might be still unprocessed samples in the stretcher. // We should either remove or flush them so they won't be in the output // next time we switch a playback rate other than 1.0. NS_WARNING_ASSERTION( mTimeStretcher->numUnprocessedSamples() == 0, "no samples"); } while (aWriter.Available() > 0) { UniquePtr<Chunk> c = mDataSource.PopFrames(aWriter.Available()); if (c->Frames() == 0) { break; } MOZ_ASSERT(c->Frames() <= aWriter.Available()); if (IsValidAudioFormat(c.get())) { aWriter.Write(c->Data(), c->Frames()); } else { // Write silence if invalid format. aWriter.WriteZeros(c->Frames()); } } } void AudioStream::GetTimeStretched(AudioBufferWriter& aWriter) { mMonitor.AssertCurrentThreadOwns(); // We need to call the non-locking version, because we already have the lock. if (EnsureTimeStretcherInitializedUnlocked() != NS_OK) { return; } uint32_t toPopFrames = ceil(aWriter.Available() * mAudioClock.GetPlaybackRate()); while (mTimeStretcher->numSamples() < aWriter.Available()) { UniquePtr<Chunk> c = mDataSource.PopFrames(toPopFrames); if (c->Frames() == 0) { break; } MOZ_ASSERT(c->Frames() <= toPopFrames); if (IsValidAudioFormat(c.get())) { mTimeStretcher->putSamples(c->Data(), c->Frames()); } else { // Write silence if invalid format. AutoTArray<AudioDataValue, 1000> buf; buf.SetLength(mOutChannels * c->Frames()); memset(buf.Elements(), 0, buf.Length() * sizeof(AudioDataValue)); mTimeStretcher->putSamples(buf.Elements(), c->Frames()); } } auto timeStretcher = mTimeStretcher; aWriter.Write([timeStretcher] (AudioDataValue* aPtr, uint32_t aFrames) { return timeStretcher->receiveSamples(aPtr, aFrames); }, aWriter.Available()); } long AudioStream::DataCallback(void* aBuffer, long aFrames) { MonitorAutoLock mon(mMonitor); MOZ_ASSERT(mState != SHUTDOWN, "No data callback after shutdown"); auto writer = AudioBufferWriter( reinterpret_cast<AudioDataValue*>(aBuffer), mOutChannels, aFrames); if (!strcmp(cubeb_get_backend_id(CubebUtils::GetCubebContext()), "winmm")) { // Don't consume audio data until Start() is called. // Expected only with cubeb winmm backend. if (mState == INITIALIZED) { NS_WARNING("data callback fires before cubeb_stream_start() is called"); mAudioClock.UpdateFrameHistory(0, aFrames); return writer.WriteZeros(aFrames); } } else { MOZ_ASSERT(mState != INITIALIZED); } // NOTE: wasapi (others?) can call us back *after* stop()/Shutdown() (mState == SHUTDOWN) // Bug 996162 if (mAudioClock.GetInputRate() == mAudioClock.GetOutputRate()) { GetUnprocessed(writer); } else { GetTimeStretched(writer); } // Always send audible frames first, and silent frames later. // Otherwise it will break the assumption of FrameHistory. if (!mDataSource.Ended()) { mAudioClock.UpdateFrameHistory(aFrames - writer.Available(), writer.Available()); if (writer.Available() > 0) { LOGW("lost %d frames", writer.Available()); writer.WriteZeros(writer.Available()); } } else { // No more new data in the data source. Don't send silent frames so the // cubeb stream can start draining. mAudioClock.UpdateFrameHistory(aFrames - writer.Available(), 0); } WriteDumpFile(mDumpFile, this, aFrames, aBuffer); return aFrames - writer.Available(); } void AudioStream::StateCallback(cubeb_state aState) { MonitorAutoLock mon(mMonitor); MOZ_ASSERT(mState != SHUTDOWN, "No state callback after shutdown"); LOG("StateCallback, mState=%d cubeb_state=%d", mState, aState); if (aState == CUBEB_STATE_DRAINED) { mState = DRAINED; mDataSource.Drained(); } else if (aState == CUBEB_STATE_ERROR) { LOG("StateCallback() state %d cubeb error", mState); mState = ERRORED; } } AudioClock::AudioClock() : mOutRate(0), mInRate(0), mPreservesPitch(true), mFrameHistory(new FrameHistory()) {} void AudioClock::Init(uint32_t aRate) { mOutRate = aRate; mInRate = aRate; } void AudioClock::UpdateFrameHistory(uint32_t aServiced, uint32_t aUnderrun) { mFrameHistory->Append(aServiced, aUnderrun, mOutRate); } int64_t AudioClock::GetPositionInFrames(int64_t aFrames) const { CheckedInt64 v = UsecsToFrames(GetPosition(aFrames), mInRate); return v.isValid() ? v.value() : -1; } int64_t AudioClock::GetPosition(int64_t frames) const { return mFrameHistory->GetPosition(frames); } void AudioClock::SetPlaybackRate(double aPlaybackRate) { mOutRate = static_cast<uint32_t>(mInRate / aPlaybackRate); } double AudioClock::GetPlaybackRate() const { return static_cast<double>(mInRate) / mOutRate; } void AudioClock::SetPreservesPitch(bool aPreservesPitch) { mPreservesPitch = aPreservesPitch; } bool AudioClock::GetPreservesPitch() const { return mPreservesPitch; } } // namespace mozilla