/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include "AppleCMLinker.h" #include "AppleDecoderModule.h" #include "AppleUtils.h" #include "AppleVTDecoder.h" #include "AppleVTLinker.h" #include "MediaData.h" #include "mozilla/ArrayUtils.h" #include "mp4_demuxer/H264.h" #include "nsAutoPtr.h" #include "nsThreadUtils.h" #include "mozilla/Logging.h" #include "VideoUtils.h" #include "gfxPlatform.h" #define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) namespace mozilla { AppleVTDecoder::AppleVTDecoder(const VideoInfo& aConfig, TaskQueue* aTaskQueue, MediaDataDecoderCallback* aCallback, layers::ImageContainer* aImageContainer) : mExtraData(aConfig.mExtraData) , mCallback(aCallback) , mPictureWidth(aConfig.mImage.width) , mPictureHeight(aConfig.mImage.height) , mDisplayWidth(aConfig.mDisplay.width) , mDisplayHeight(aConfig.mDisplay.height) , mTaskQueue(aTaskQueue) , mMaxRefFrames(mp4_demuxer::H264::ComputeMaxRefFrames(aConfig.mExtraData)) , mImageContainer(aImageContainer) , mIsShutDown(false) #ifdef MOZ_WIDGET_UIKIT , mUseSoftwareImages(true) #else , mUseSoftwareImages(false) #endif , mIsFlushing(false) , mMonitor("AppleVideoDecoder") , mFormat(nullptr) , mSession(nullptr) , mIsHardwareAccelerated(false) { MOZ_COUNT_CTOR(AppleVTDecoder); // TODO: Verify aConfig.mime_type. LOG("Creating AppleVTDecoder for %dx%d h.264 video", mDisplayWidth, mDisplayHeight ); } AppleVTDecoder::~AppleVTDecoder() { MOZ_COUNT_DTOR(AppleVTDecoder); } RefPtr AppleVTDecoder::Init() { nsresult rv = InitializeSession(); if (NS_SUCCEEDED(rv)) { return InitPromise::CreateAndResolve(TrackType::kVideoTrack, __func__); } return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__); } void AppleVTDecoder::Input(MediaRawData* aSample) { MOZ_ASSERT(mCallback->OnReaderTaskQueue()); LOG("mp4 input sample %p pts %lld duration %lld us%s %d bytes", aSample, aSample->mTime, aSample->mDuration, aSample->mKeyframe ? " keyframe" : "", aSample->Size()); mTaskQueue->Dispatch(NewRunnableMethod>( this, &AppleVTDecoder::ProcessDecode, aSample)); } void AppleVTDecoder::Flush() { MOZ_ASSERT(mCallback->OnReaderTaskQueue()); mIsFlushing = true; nsCOMPtr runnable = NewRunnableMethod(this, &AppleVTDecoder::ProcessFlush); SyncRunnable::DispatchToThread(mTaskQueue, runnable); mIsFlushing = false; mSeekTargetThreshold.reset(); } void AppleVTDecoder::Drain() { MOZ_ASSERT(mCallback->OnReaderTaskQueue()); nsCOMPtr runnable = NewRunnableMethod(this, &AppleVTDecoder::ProcessDrain); mTaskQueue->Dispatch(runnable.forget()); } void AppleVTDecoder::Shutdown() { MOZ_DIAGNOSTIC_ASSERT(!mIsShutDown); mIsShutDown = true; if (mTaskQueue) { nsCOMPtr runnable = NewRunnableMethod(this, &AppleVTDecoder::ProcessShutdown); mTaskQueue->Dispatch(runnable.forget()); } else { ProcessShutdown(); } } nsresult AppleVTDecoder::ProcessDecode(MediaRawData* aSample) { AssertOnTaskQueueThread(); if (mIsFlushing) { return NS_OK; } auto rv = DoDecode(aSample); return rv; } void AppleVTDecoder::ProcessShutdown() { if (mSession) { LOG("%s: cleaning up session %p", __func__, mSession); VTDecompressionSessionInvalidate(mSession); CFRelease(mSession); mSession = nullptr; } if (mFormat) { LOG("%s: releasing format %p", __func__, mFormat); CFRelease(mFormat); mFormat = nullptr; } } void AppleVTDecoder::ProcessFlush() { AssertOnTaskQueueThread(); nsresult rv = WaitForAsynchronousFrames(); if (NS_FAILED(rv)) { LOG("AppleVTDecoder::Flush failed waiting for platform decoder " "with error:%d.", rv); } ClearReorderedFrames(); } void AppleVTDecoder::ProcessDrain() { AssertOnTaskQueueThread(); nsresult rv = WaitForAsynchronousFrames(); if (NS_FAILED(rv)) { LOG("AppleVTDecoder::Drain failed waiting for platform decoder " "with error:%d.", rv); } DrainReorderedFrames(); mCallback->DrainComplete(); } AppleVTDecoder::AppleFrameRef* AppleVTDecoder::CreateAppleFrameRef(const MediaRawData* aSample) { MOZ_ASSERT(aSample); return new AppleFrameRef(*aSample); } void AppleVTDecoder::DrainReorderedFrames() { MonitorAutoLock mon(mMonitor); while (!mReorderQueue.IsEmpty()) { mCallback->Output(mReorderQueue.Pop().get()); } } void AppleVTDecoder::ClearReorderedFrames() { MonitorAutoLock mon(mMonitor); while (!mReorderQueue.IsEmpty()) { mReorderQueue.Pop(); } } void AppleVTDecoder::SetSeekThreshold(const media::TimeUnit& aTime) { LOG("SetSeekThreshold %lld", aTime.ToMicroseconds()); mSeekTargetThreshold = Some(aTime); } // // Implementation details. // // Callback passed to the VideoToolbox decoder for returning data. // This needs to be static because the API takes a C-style pair of // function and userdata pointers. This validates parameters and // forwards the decoded image back to an object method. static void PlatformCallback(void* decompressionOutputRefCon, void* sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags flags, CVImageBufferRef image, CMTime presentationTimeStamp, CMTime presentationDuration) { LOG("AppleVideoDecoder %s status %d flags %d", __func__, status, flags); AppleVTDecoder* decoder = static_cast(decompressionOutputRefCon); nsAutoPtr frameRef( static_cast(sourceFrameRefCon)); // Validate our arguments. if (status != noErr || !image) { NS_WARNING("VideoToolbox decoder returned no data"); image = nullptr; } else if (flags & kVTDecodeInfo_FrameDropped) { NS_WARNING(" ...frame tagged as dropped..."); } else { MOZ_ASSERT(CFGetTypeID(image) == CVPixelBufferGetTypeID(), "VideoToolbox returned an unexpected image type"); } decoder->OutputFrame(image, *frameRef); } // Copy and return a decoded frame. nsresult AppleVTDecoder::OutputFrame(CVPixelBufferRef aImage, AppleVTDecoder::AppleFrameRef aFrameRef) { if (mIsShutDown || mIsFlushing) { // We are in the process of flushing or shutting down; ignore frame. return NS_OK; } LOG("mp4 output frame %lld dts %lld pts %lld duration %lld us%s", aFrameRef.byte_offset, aFrameRef.decode_timestamp.ToMicroseconds(), aFrameRef.composition_timestamp.ToMicroseconds(), aFrameRef.duration.ToMicroseconds(), aFrameRef.is_sync_point ? " keyframe" : "" ); if (!aImage) { // Image was dropped by decoder or none return yet. // We need more input to continue. mCallback->InputExhausted(); return NS_OK; } bool useNullSample = false; if (mSeekTargetThreshold.isSome()) { if ((aFrameRef.composition_timestamp + aFrameRef.duration) < mSeekTargetThreshold.ref()) { useNullSample = true; } else { mSeekTargetThreshold.reset(); } } // Where our resulting image will end up. RefPtr data; // Bounds. VideoInfo info; info.mDisplay = nsIntSize(mDisplayWidth, mDisplayHeight); gfx::IntRect visible = gfx::IntRect(0, 0, mPictureWidth, mPictureHeight); if (useNullSample) { data = new NullData(aFrameRef.byte_offset, aFrameRef.composition_timestamp.ToMicroseconds(), aFrameRef.duration.ToMicroseconds()); } else if (mUseSoftwareImages) { size_t width = CVPixelBufferGetWidth(aImage); size_t height = CVPixelBufferGetHeight(aImage); DebugOnly planes = CVPixelBufferGetPlaneCount(aImage); MOZ_ASSERT(planes == 2, "Likely not NV12 format and it must be."); VideoData::YCbCrBuffer buffer; // Lock the returned image data. CVReturn rv = CVPixelBufferLockBaseAddress(aImage, kCVPixelBufferLock_ReadOnly); if (rv != kCVReturnSuccess) { NS_ERROR("error locking pixel data"); mCallback->Error( MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("CVPixelBufferLockBaseAddress:%x", rv))); return NS_ERROR_DOM_MEDIA_DECODE_ERR; } // Y plane. buffer.mPlanes[0].mData = static_cast(CVPixelBufferGetBaseAddressOfPlane(aImage, 0)); buffer.mPlanes[0].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 0); buffer.mPlanes[0].mWidth = width; buffer.mPlanes[0].mHeight = height; buffer.mPlanes[0].mOffset = 0; buffer.mPlanes[0].mSkip = 0; // Cb plane. buffer.mPlanes[1].mData = static_cast(CVPixelBufferGetBaseAddressOfPlane(aImage, 1)); buffer.mPlanes[1].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 1); buffer.mPlanes[1].mWidth = (width+1) / 2; buffer.mPlanes[1].mHeight = (height+1) / 2; buffer.mPlanes[1].mOffset = 0; buffer.mPlanes[1].mSkip = 1; // Cr plane. buffer.mPlanes[2].mData = static_cast(CVPixelBufferGetBaseAddressOfPlane(aImage, 1)); buffer.mPlanes[2].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 1); buffer.mPlanes[2].mWidth = (width+1) / 2; buffer.mPlanes[2].mHeight = (height+1) / 2; buffer.mPlanes[2].mOffset = 1; buffer.mPlanes[2].mSkip = 1; // Copy the image data into our own format. data = VideoData::CreateAndCopyData(info, mImageContainer, aFrameRef.byte_offset, aFrameRef.composition_timestamp.ToMicroseconds(), aFrameRef.duration.ToMicroseconds(), buffer, aFrameRef.is_sync_point, aFrameRef.decode_timestamp.ToMicroseconds(), visible); // Unlock the returned image data. CVPixelBufferUnlockBaseAddress(aImage, kCVPixelBufferLock_ReadOnly); } else { #ifndef MOZ_WIDGET_UIKIT IOSurfacePtr surface = MacIOSurfaceLib::CVPixelBufferGetIOSurface(aImage); MOZ_ASSERT(surface, "Decoder didn't return an IOSurface backed buffer"); RefPtr macSurface = new MacIOSurface(surface); RefPtr image = new MacIOSurfaceImage(macSurface); data = VideoData::CreateFromImage(info, aFrameRef.byte_offset, aFrameRef.composition_timestamp.ToMicroseconds(), aFrameRef.duration.ToMicroseconds(), image.forget(), aFrameRef.is_sync_point, aFrameRef.decode_timestamp.ToMicroseconds(), visible); #else MOZ_ASSERT_UNREACHABLE("No MacIOSurface on iOS"); #endif } if (!data) { NS_ERROR("Couldn't create VideoData for frame"); mCallback->Error(MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__)); return NS_ERROR_OUT_OF_MEMORY; } // Frames come out in DTS order but we need to output them // in composition order. MonitorAutoLock mon(mMonitor); mReorderQueue.Push(data); if (mReorderQueue.Length() > mMaxRefFrames) { mCallback->Output(mReorderQueue.Pop().get()); } mCallback->InputExhausted(); LOG("%llu decoded frames queued", static_cast(mReorderQueue.Length())); return NS_OK; } nsresult AppleVTDecoder::WaitForAsynchronousFrames() { OSStatus rv = VTDecompressionSessionWaitForAsynchronousFrames(mSession); if (rv != noErr) { LOG("AppleVTDecoder: Error %d waiting for asynchronous frames", rv); return NS_ERROR_FAILURE; } return NS_OK; } // Helper to fill in a timestamp structure. static CMSampleTimingInfo TimingInfoFromSample(MediaRawData* aSample) { CMSampleTimingInfo timestamp; timestamp.duration = CMTimeMake(aSample->mDuration, USECS_PER_S); timestamp.presentationTimeStamp = CMTimeMake(aSample->mTime, USECS_PER_S); timestamp.decodeTimeStamp = CMTimeMake(aSample->mTimecode, USECS_PER_S); return timestamp; } MediaResult AppleVTDecoder::DoDecode(MediaRawData* aSample) { AssertOnTaskQueueThread(); // For some reason this gives me a double-free error with stagefright. AutoCFRelease block = nullptr; AutoCFRelease sample = nullptr; VTDecodeInfoFlags infoFlags; OSStatus rv; // FIXME: This copies the sample data. I think we can provide // a custom block source which reuses the aSample buffer. // But note that there may be a problem keeping the samples // alive over multiple frames. rv = CMBlockBufferCreateWithMemoryBlock(kCFAllocatorDefault, // Struct allocator. const_cast(aSample->Data()), aSample->Size(), kCFAllocatorNull, // Block allocator. NULL, // Block source. 0, // Data offset. aSample->Size(), false, block.receive()); if (rv != noErr) { NS_ERROR("Couldn't create CMBlockBuffer"); mCallback->Error( MediaResult(NS_ERROR_OUT_OF_MEMORY, RESULT_DETAIL("CMBlockBufferCreateWithMemoryBlock:%x", rv))); return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); } CMSampleTimingInfo timestamp = TimingInfoFromSample(aSample); rv = CMSampleBufferCreate(kCFAllocatorDefault, block, true, 0, 0, mFormat, 1, 1, ×tamp, 0, NULL, sample.receive()); if (rv != noErr) { NS_ERROR("Couldn't create CMSampleBuffer"); mCallback->Error(MediaResult(NS_ERROR_OUT_OF_MEMORY, RESULT_DETAIL("CMSampleBufferCreate:%x", rv))); return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); } VTDecodeFrameFlags decodeFlags = kVTDecodeFrame_EnableAsynchronousDecompression; rv = VTDecompressionSessionDecodeFrame(mSession, sample, decodeFlags, CreateAppleFrameRef(aSample), &infoFlags); if (rv != noErr && !(infoFlags & kVTDecodeInfo_FrameDropped)) { LOG("AppleVTDecoder: Error %d VTDecompressionSessionDecodeFrame", rv); NS_WARNING("Couldn't pass frame to decoder"); mCallback->Error( MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("VTDecompressionSessionDecodeFrame:%x", rv))); return NS_ERROR_DOM_MEDIA_DECODE_ERR; } return NS_OK; } nsresult AppleVTDecoder::InitializeSession() { OSStatus rv; AutoCFRelease extensions = CreateDecoderExtensions(); rv = CMVideoFormatDescriptionCreate(kCFAllocatorDefault, kCMVideoCodecType_H264, mPictureWidth, mPictureHeight, extensions, &mFormat); if (rv != noErr) { NS_ERROR("Couldn't create format description!"); return NS_ERROR_FAILURE; } // Contruct video decoder selection spec. AutoCFRelease spec = CreateDecoderSpecification(); // Contruct output configuration. AutoCFRelease outputConfiguration = CreateOutputConfiguration(); VTDecompressionOutputCallbackRecord cb = { PlatformCallback, this }; rv = VTDecompressionSessionCreate(kCFAllocatorDefault, mFormat, spec, // Video decoder selection. outputConfiguration, // Output video format. &cb, &mSession); if (rv != noErr) { NS_ERROR("Couldn't create decompression session!"); return NS_ERROR_FAILURE; } if (AppleVTLinker::skPropUsingHWAccel) { CFBooleanRef isUsingHW = nullptr; rv = VTSessionCopyProperty(mSession, AppleVTLinker::skPropUsingHWAccel, kCFAllocatorDefault, &isUsingHW); if (rv != noErr) { LOG("AppleVTDecoder: system doesn't support hardware acceleration"); } mIsHardwareAccelerated = rv == noErr && isUsingHW == kCFBooleanTrue; LOG("AppleVTDecoder: %s hardware accelerated decoding", mIsHardwareAccelerated ? "using" : "not using"); } else { LOG("AppleVTDecoder: couldn't determine hardware acceleration status."); } return NS_OK; } CFDictionaryRef AppleVTDecoder::CreateDecoderExtensions() { AutoCFRelease avc_data = CFDataCreate(kCFAllocatorDefault, mExtraData->Elements(), mExtraData->Length()); const void* atomsKey[] = { CFSTR("avcC") }; const void* atomsValue[] = { avc_data }; static_assert(ArrayLength(atomsKey) == ArrayLength(atomsValue), "Non matching keys/values array size"); AutoCFRelease atoms = CFDictionaryCreate(kCFAllocatorDefault, atomsKey, atomsValue, ArrayLength(atomsKey), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); const void* extensionKeys[] = { kCVImageBufferChromaLocationBottomFieldKey, kCVImageBufferChromaLocationTopFieldKey, AppleCMLinker::skPropExtensionAtoms }; const void* extensionValues[] = { kCVImageBufferChromaLocation_Left, kCVImageBufferChromaLocation_Left, atoms }; static_assert(ArrayLength(extensionKeys) == ArrayLength(extensionValues), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, extensionKeys, extensionValues, ArrayLength(extensionKeys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); } CFDictionaryRef AppleVTDecoder::CreateDecoderSpecification() { if (!AppleVTLinker::skPropEnableHWAccel) { return nullptr; } const void* specKeys[] = { AppleVTLinker::skPropEnableHWAccel }; const void* specValues[1]; if (AppleDecoderModule::sCanUseHardwareVideoDecoder) { specValues[0] = kCFBooleanTrue; } else { // This GPU is blacklisted for hardware decoding. specValues[0] = kCFBooleanFalse; } static_assert(ArrayLength(specKeys) == ArrayLength(specValues), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, specKeys, specValues, ArrayLength(specKeys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); } CFDictionaryRef AppleVTDecoder::CreateOutputConfiguration() { if (mUseSoftwareImages) { // Output format type: SInt32 PixelFormatTypeValue = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange; AutoCFRelease PixelFormatTypeNumber = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &PixelFormatTypeValue); const void* outputKeys[] = { kCVPixelBufferPixelFormatTypeKey }; const void* outputValues[] = { PixelFormatTypeNumber }; static_assert(ArrayLength(outputKeys) == ArrayLength(outputValues), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, outputKeys, outputValues, ArrayLength(outputKeys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); } #ifndef MOZ_WIDGET_UIKIT // Output format type: SInt32 PixelFormatTypeValue = kCVPixelFormatType_422YpCbCr8; AutoCFRelease PixelFormatTypeNumber = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &PixelFormatTypeValue); // Construct IOSurface Properties const void* IOSurfaceKeys[] = { MacIOSurfaceLib::kPropIsGlobal }; const void* IOSurfaceValues[] = { kCFBooleanTrue }; static_assert(ArrayLength(IOSurfaceKeys) == ArrayLength(IOSurfaceValues), "Non matching keys/values array size"); // Contruct output configuration. AutoCFRelease IOSurfaceProperties = CFDictionaryCreate(kCFAllocatorDefault, IOSurfaceKeys, IOSurfaceValues, ArrayLength(IOSurfaceKeys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); const void* outputKeys[] = { kCVPixelBufferIOSurfacePropertiesKey, kCVPixelBufferPixelFormatTypeKey, kCVPixelBufferOpenGLCompatibilityKey }; const void* outputValues[] = { IOSurfaceProperties, PixelFormatTypeNumber, kCFBooleanTrue }; static_assert(ArrayLength(outputKeys) == ArrayLength(outputValues), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, outputKeys, outputValues, ArrayLength(outputKeys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); #else MOZ_ASSERT_UNREACHABLE("No MacIOSurface on iOS"); #endif } } // namespace mozilla