From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- dom/media/webspeech/synth/SpeechSynthesis.cpp | 333 ++++++++ dom/media/webspeech/synth/SpeechSynthesis.h | 85 +++ .../webspeech/synth/SpeechSynthesisUtterance.cpp | 178 +++++ .../webspeech/synth/SpeechSynthesisUtterance.h | 124 +++ dom/media/webspeech/synth/SpeechSynthesisVoice.cpp | 94 +++ dom/media/webspeech/synth/SpeechSynthesisVoice.h | 60 ++ .../synth/cocoa/OSXSpeechSynthesizerModule.cpp | 57 ++ .../synth/cocoa/OSXSpeechSynthesizerService.h | 44 ++ .../synth/cocoa/OSXSpeechSynthesizerService.mm | 499 ++++++++++++ dom/media/webspeech/synth/cocoa/moz.build | 12 + dom/media/webspeech/synth/crashtests/1230428.html | 32 + .../webspeech/synth/crashtests/crashtests.list | 1 + .../webspeech/synth/ipc/PSpeechSynthesis.ipdl | 49 ++ .../synth/ipc/PSpeechSynthesisRequest.ipdl | 46 ++ .../webspeech/synth/ipc/SpeechSynthesisChild.cpp | 213 ++++++ .../webspeech/synth/ipc/SpeechSynthesisChild.h | 106 +++ .../webspeech/synth/ipc/SpeechSynthesisParent.cpp | 234 ++++++ .../webspeech/synth/ipc/SpeechSynthesisParent.h | 108 +++ dom/media/webspeech/synth/moz.build | 70 ++ dom/media/webspeech/synth/nsISpeechService.idl | 173 +++++ .../webspeech/synth/nsISynthVoiceRegistry.idl | 77 ++ dom/media/webspeech/synth/nsSpeechTask.cpp | 783 +++++++++++++++++++ dom/media/webspeech/synth/nsSpeechTask.h | 139 ++++ dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp | 835 +++++++++++++++++++++ dom/media/webspeech/synth/nsSynthVoiceRegistry.h | 109 +++ dom/media/webspeech/synth/pico/PicoModule.cpp | 58 ++ dom/media/webspeech/synth/pico/moz.build | 13 + dom/media/webspeech/synth/pico/nsPicoService.cpp | 761 +++++++++++++++++++ dom/media/webspeech/synth/pico/nsPicoService.h | 93 +++ .../synth/speechd/SpeechDispatcherModule.cpp | 56 ++ .../synth/speechd/SpeechDispatcherService.cpp | 593 +++++++++++++++ .../synth/speechd/SpeechDispatcherService.h | 67 ++ dom/media/webspeech/synth/speechd/moz.build | 13 + dom/media/webspeech/synth/test/FakeSynthModule.cpp | 55 ++ dom/media/webspeech/synth/test/common.js | 91 +++ .../webspeech/synth/test/file_bfcache_frame.html | 28 + .../webspeech/synth/test/file_global_queue.html | 69 ++ .../synth/test/file_global_queue_cancel.html | 88 +++ .../synth/test/file_global_queue_pause.html | 131 ++++ .../synth/test/file_indirect_service_events.html | 102 +++ dom/media/webspeech/synth/test/file_setup.html | 95 +++ .../webspeech/synth/test/file_speech_cancel.html | 100 +++ .../webspeech/synth/test/file_speech_error.html | 46 ++ .../webspeech/synth/test/file_speech_queue.html | 85 +++ .../webspeech/synth/test/file_speech_simple.html | 53 ++ dom/media/webspeech/synth/test/mochitest.ini | 26 + .../webspeech/synth/test/nsFakeSynthServices.cpp | 401 ++++++++++ .../webspeech/synth/test/nsFakeSynthServices.h | 52 ++ .../synth/test/startup/file_voiceschanged.html | 32 + .../webspeech/synth/test/startup/mochitest.ini | 7 + .../synth/test/startup/test_voiceschanged.html | 32 + dom/media/webspeech/synth/test/test_bfcache.html | 45 ++ .../webspeech/synth/test/test_global_queue.html | 35 + .../synth/test/test_global_queue_cancel.html | 35 + .../synth/test/test_global_queue_pause.html | 35 + .../synth/test/test_indirect_service_events.html | 36 + dom/media/webspeech/synth/test/test_setup.html | 32 + .../webspeech/synth/test/test_speech_cancel.html | 35 + .../webspeech/synth/test/test_speech_error.html | 35 + .../webspeech/synth/test/test_speech_queue.html | 37 + .../webspeech/synth/test/test_speech_simple.html | 34 + dom/media/webspeech/synth/windows/SapiModule.cpp | 57 ++ dom/media/webspeech/synth/windows/SapiService.cpp | 470 ++++++++++++ dom/media/webspeech/synth/windows/SapiService.h | 59 ++ dom/media/webspeech/synth/windows/moz.build | 13 + 65 files changed, 8566 insertions(+) create mode 100644 dom/media/webspeech/synth/SpeechSynthesis.cpp create mode 100644 dom/media/webspeech/synth/SpeechSynthesis.h create mode 100644 dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp create mode 100644 dom/media/webspeech/synth/SpeechSynthesisUtterance.h create mode 100644 dom/media/webspeech/synth/SpeechSynthesisVoice.cpp create mode 100644 dom/media/webspeech/synth/SpeechSynthesisVoice.h create mode 100644 dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerModule.cpp create mode 100644 dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h create mode 100644 dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm create mode 100644 dom/media/webspeech/synth/cocoa/moz.build create mode 100644 dom/media/webspeech/synth/crashtests/1230428.html create mode 100644 dom/media/webspeech/synth/crashtests/crashtests.list create mode 100644 dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl create mode 100644 dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl create mode 100644 dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp create mode 100644 dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h create mode 100644 dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp create mode 100644 dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h create mode 100644 dom/media/webspeech/synth/moz.build create mode 100644 dom/media/webspeech/synth/nsISpeechService.idl create mode 100644 dom/media/webspeech/synth/nsISynthVoiceRegistry.idl create mode 100644 dom/media/webspeech/synth/nsSpeechTask.cpp create mode 100644 dom/media/webspeech/synth/nsSpeechTask.h create mode 100644 dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp create mode 100644 dom/media/webspeech/synth/nsSynthVoiceRegistry.h create mode 100644 dom/media/webspeech/synth/pico/PicoModule.cpp create mode 100644 dom/media/webspeech/synth/pico/moz.build create mode 100644 dom/media/webspeech/synth/pico/nsPicoService.cpp create mode 100644 dom/media/webspeech/synth/pico/nsPicoService.h create mode 100644 dom/media/webspeech/synth/speechd/SpeechDispatcherModule.cpp create mode 100644 dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp create mode 100644 dom/media/webspeech/synth/speechd/SpeechDispatcherService.h create mode 100644 dom/media/webspeech/synth/speechd/moz.build create mode 100644 dom/media/webspeech/synth/test/FakeSynthModule.cpp create mode 100644 dom/media/webspeech/synth/test/common.js create mode 100644 dom/media/webspeech/synth/test/file_bfcache_frame.html create mode 100644 dom/media/webspeech/synth/test/file_global_queue.html create mode 100644 dom/media/webspeech/synth/test/file_global_queue_cancel.html create mode 100644 dom/media/webspeech/synth/test/file_global_queue_pause.html create mode 100644 dom/media/webspeech/synth/test/file_indirect_service_events.html create mode 100644 dom/media/webspeech/synth/test/file_setup.html create mode 100644 dom/media/webspeech/synth/test/file_speech_cancel.html create mode 100644 dom/media/webspeech/synth/test/file_speech_error.html create mode 100644 dom/media/webspeech/synth/test/file_speech_queue.html create mode 100644 dom/media/webspeech/synth/test/file_speech_simple.html create mode 100644 dom/media/webspeech/synth/test/mochitest.ini create mode 100644 dom/media/webspeech/synth/test/nsFakeSynthServices.cpp create mode 100644 dom/media/webspeech/synth/test/nsFakeSynthServices.h create mode 100644 dom/media/webspeech/synth/test/startup/file_voiceschanged.html create mode 100644 dom/media/webspeech/synth/test/startup/mochitest.ini create mode 100644 dom/media/webspeech/synth/test/startup/test_voiceschanged.html create mode 100644 dom/media/webspeech/synth/test/test_bfcache.html create mode 100644 dom/media/webspeech/synth/test/test_global_queue.html create mode 100644 dom/media/webspeech/synth/test/test_global_queue_cancel.html create mode 100644 dom/media/webspeech/synth/test/test_global_queue_pause.html create mode 100644 dom/media/webspeech/synth/test/test_indirect_service_events.html create mode 100644 dom/media/webspeech/synth/test/test_setup.html create mode 100644 dom/media/webspeech/synth/test/test_speech_cancel.html create mode 100644 dom/media/webspeech/synth/test/test_speech_error.html create mode 100644 dom/media/webspeech/synth/test/test_speech_queue.html create mode 100644 dom/media/webspeech/synth/test/test_speech_simple.html create mode 100644 dom/media/webspeech/synth/windows/SapiModule.cpp create mode 100644 dom/media/webspeech/synth/windows/SapiService.cpp create mode 100644 dom/media/webspeech/synth/windows/SapiService.h create mode 100644 dom/media/webspeech/synth/windows/moz.build (limited to 'dom/media/webspeech/synth') diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp new file mode 100644 index 000000000..f00a1064a --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp @@ -0,0 +1,333 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupportsPrimitives.h" +#include "nsSpeechTask.h" +#include "mozilla/Logging.h" + +#include "mozilla/dom/ContentChild.h" +#include "mozilla/dom/Element.h" + +#include "mozilla/dom/SpeechSynthesisBinding.h" +#include "SpeechSynthesis.h" +#include "nsSynthVoiceRegistry.h" +#include "nsIDocument.h" + +#undef LOG +mozilla::LogModule* +GetSpeechSynthLog() +{ + static mozilla::LazyLogModule sLog("SpeechSynthesis"); + + return sLog; +} +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +namespace mozilla { +namespace dom { + +NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis) + +NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis, DOMEventTargetHelper) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue) + tmp->mVoiceCache.Clear(); +NS_IMPL_CYCLE_COLLECTION_UNLINK_END + +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis, DOMEventTargetHelper) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue) + for (auto iter = tmp->mVoiceCache.Iter(); !iter.Done(); iter.Next()) { + SpeechSynthesisVoice* voice = iter.UserData(); + cb.NoteXPCOMChild(voice); + } +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION_INHERITED(SpeechSynthesis) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) +NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) + +NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper) +NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper) + +SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent) + : DOMEventTargetHelper(aParent) + , mHoldQueue(false) + , mInnerID(aParent->WindowID()) +{ + MOZ_ASSERT(aParent->IsInnerWindow()); + MOZ_ASSERT(NS_IsMainThread()); + + nsCOMPtr obs = mozilla::services::GetObserverService(); + if (obs) { + obs->AddObserver(this, "inner-window-destroyed", true); + obs->AddObserver(this, "synth-voices-changed", true); + } + +} + +SpeechSynthesis::~SpeechSynthesis() +{ +} + +JSObject* +SpeechSynthesis::WrapObject(JSContext* aCx, JS::Handle aGivenProto) +{ + return SpeechSynthesisBinding::Wrap(aCx, this, aGivenProto); +} + +bool +SpeechSynthesis::Pending() const +{ + switch (mSpeechQueue.Length()) { + case 0: + return false; + + case 1: + return mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_PENDING; + + default: + return true; + } +} + +bool +SpeechSynthesis::Speaking() const +{ + if (!mSpeechQueue.IsEmpty() && + mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) { + return true; + } + + // Returns global speaking state if global queue is enabled. Or false. + return nsSynthVoiceRegistry::GetInstance()->IsSpeaking(); +} + +bool +SpeechSynthesis::Paused() const +{ + return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) || + (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused()); +} + +bool +SpeechSynthesis::HasEmptyQueue() const +{ + return mSpeechQueue.Length() == 0; +} + +bool SpeechSynthesis::HasVoices() const +{ + uint32_t voiceCount = mVoiceCache.Count(); + if (voiceCount == 0) { + nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); + if(NS_WARN_IF(NS_FAILED(rv))) { + return false; + } + } + + return voiceCount != 0; +} + +void +SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) +{ + if (aUtterance.mState != SpeechSynthesisUtterance::STATE_NONE) { + // XXX: Should probably raise an error + return; + } + + mSpeechQueue.AppendElement(&aUtterance); + aUtterance.mState = SpeechSynthesisUtterance::STATE_PENDING; + + // If we only have one item in the queue, we aren't pre-paused, and + // we have voices available, speak it. + if (mSpeechQueue.Length() == 1 && !mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } +} + +void +SpeechSynthesis::AdvanceQueue() +{ + LOG(LogLevel::Debug, + ("SpeechSynthesis::AdvanceQueue length=%d", mSpeechQueue.Length())); + + if (mSpeechQueue.IsEmpty()) { + return; + } + + RefPtr utterance = mSpeechQueue.ElementAt(0); + + nsAutoString docLang; + nsCOMPtr window = GetOwner(); + nsIDocument* doc = window ? window->GetExtantDoc() : nullptr; + + if (doc) { + Element* elm = doc->GetHtmlElement(); + + if (elm) { + elm->GetLang(docLang); + } + } + + mCurrentTask = + nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang); + + if (mCurrentTask) { + mCurrentTask->SetSpeechSynthesis(this); + } + + return; +} + +void +SpeechSynthesis::Cancel() +{ + if (!mSpeechQueue.IsEmpty() && + mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) { + // Remove all queued utterances except for current one, we will remove it + // in OnEnd + mSpeechQueue.RemoveElementsAt(1, mSpeechQueue.Length() - 1); + } else { + mSpeechQueue.Clear(); + } + + if (mCurrentTask) { + mCurrentTask->Cancel(); + } +} + +void +SpeechSynthesis::Pause() +{ + if (Paused()) { + return; + } + + if (mCurrentTask && !mSpeechQueue.IsEmpty() && + mSpeechQueue.ElementAt(0)->GetState() != SpeechSynthesisUtterance::STATE_ENDED) { + mCurrentTask->Pause(); + } else { + mHoldQueue = true; + } +} + +void +SpeechSynthesis::Resume() +{ + if (!Paused()) { + return; + } + + if (mCurrentTask) { + mCurrentTask->Resume(); + } else { + mHoldQueue = false; + AdvanceQueue(); + } +} + +void +SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) +{ + MOZ_ASSERT(mCurrentTask == aTask); + + if (!mSpeechQueue.IsEmpty()) { + mSpeechQueue.RemoveElementAt(0); + } + + mCurrentTask = nullptr; + AdvanceQueue(); +} + +void +SpeechSynthesis::GetVoices(nsTArray< RefPtr >& aResult) +{ + aResult.Clear(); + uint32_t voiceCount = 0; + + nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); + if(NS_WARN_IF(NS_FAILED(rv))) { + return; + } + + nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this); + + for (uint32_t i = 0; i < voiceCount; i++) { + nsAutoString uri; + rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri); + + if (NS_FAILED(rv)) { + NS_WARNING("Failed to retrieve voice from registry"); + continue; + } + + SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri); + + if (!voice) { + voice = new SpeechSynthesisVoice(voiceParent, uri); + } + + aResult.AppendElement(voice); + } + + mVoiceCache.Clear(); + + for (uint32_t i = 0; i < aResult.Length(); i++) { + SpeechSynthesisVoice* voice = aResult[i]; + mVoiceCache.Put(voice->mUri, voice); + } +} + +// For testing purposes, allows us to cancel the current task that is +// misbehaving, and flush the queue. +void +SpeechSynthesis::ForceEnd() +{ + if (mCurrentTask) { + mCurrentTask->ForceEnd(); + } +} + +NS_IMETHODIMP +SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + MOZ_ASSERT(NS_IsMainThread()); + + + if (strcmp(aTopic, "inner-window-destroyed") == 0) { + nsCOMPtr wrapper = do_QueryInterface(aSubject); + NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE); + + uint64_t innerID; + nsresult rv = wrapper->GetData(&innerID); + NS_ENSURE_SUCCESS(rv, rv); + + if (innerID == mInnerID) { + Cancel(); + + nsCOMPtr obs = mozilla::services::GetObserverService(); + if (obs) { + obs->RemoveObserver(this, "inner-window-destroyed"); + } + } + } else if (strcmp(aTopic, "synth-voices-changed") == 0) { + LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged")); + DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged")); + // If we have a pending item, and voices become available, speak it. + if (!mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } + } + + return NS_OK; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/SpeechSynthesis.h b/dom/media/webspeech/synth/SpeechSynthesis.h new file mode 100644 index 000000000..3a528cbf5 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesis.h @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesis_h +#define mozilla_dom_SpeechSynthesis_h + +#include "nsCOMPtr.h" +#include "nsIObserver.h" +#include "nsRefPtrHashtable.h" +#include "nsString.h" +#include "nsWeakReference.h" +#include "nsWrapperCache.h" +#include "js/TypeDecls.h" + +#include "SpeechSynthesisUtterance.h" +#include "SpeechSynthesisVoice.h" + +class nsIDOMWindow; + +namespace mozilla { +namespace dom { + +class nsSpeechTask; + +class SpeechSynthesis final : public DOMEventTargetHelper + , public nsIObserver + , public nsSupportsWeakReference +{ +public: + explicit SpeechSynthesis(nsPIDOMWindowInner* aParent); + + NS_DECL_ISUPPORTS_INHERITED + NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechSynthesis, DOMEventTargetHelper) + NS_DECL_NSIOBSERVER + + JSObject* WrapObject(JSContext* aCx, JS::Handle aGivenProto) override; + + bool Pending() const; + + bool Speaking() const; + + bool Paused() const; + + bool HasEmptyQueue() const; + + void Speak(SpeechSynthesisUtterance& aUtterance); + + void Cancel(); + + void Pause(); + + void Resume(); + + void OnEnd(const nsSpeechTask* aTask); + + void GetVoices(nsTArray< RefPtr >& aResult); + + void ForceEnd(); + + IMPL_EVENT_HANDLER(voiceschanged) + +private: + virtual ~SpeechSynthesis(); + + void AdvanceQueue(); + + bool HasVoices() const; + + nsTArray > mSpeechQueue; + + RefPtr mCurrentTask; + + nsRefPtrHashtable mVoiceCache; + + bool mHoldQueue; + + uint64_t mInnerID; +}; + +} // namespace dom +} // namespace mozilla +#endif diff --git a/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp b/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp new file mode 100644 index 000000000..c7d80a5a1 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp @@ -0,0 +1,178 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsCycleCollectionParticipant.h" +#include "nsGkAtoms.h" + +#include "mozilla/dom/SpeechSynthesisEvent.h" +#include "mozilla/dom/SpeechSynthesisUtteranceBinding.h" +#include "SpeechSynthesisUtterance.h" +#include "SpeechSynthesisVoice.h" + +#include + +namespace mozilla { +namespace dom { + +NS_IMPL_CYCLE_COLLECTION_INHERITED(SpeechSynthesisUtterance, + DOMEventTargetHelper, mVoice); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION_INHERITED(SpeechSynthesisUtterance) +NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) + +NS_IMPL_ADDREF_INHERITED(SpeechSynthesisUtterance, DOMEventTargetHelper) +NS_IMPL_RELEASE_INHERITED(SpeechSynthesisUtterance, DOMEventTargetHelper) + +SpeechSynthesisUtterance::SpeechSynthesisUtterance(nsPIDOMWindowInner* aOwnerWindow, + const nsAString& text) + : DOMEventTargetHelper(aOwnerWindow) + , mText(text) + , mVolume(1) + , mRate(1) + , mPitch(1) + , mState(STATE_NONE) + , mPaused(false) +{ +} + +SpeechSynthesisUtterance::~SpeechSynthesisUtterance() {} + +JSObject* +SpeechSynthesisUtterance::WrapObject(JSContext* aCx, JS::Handle aGivenProto) +{ + return SpeechSynthesisUtteranceBinding::Wrap(aCx, this, aGivenProto); +} + +nsISupports* +SpeechSynthesisUtterance::GetParentObject() const +{ + return GetOwner(); +} + +already_AddRefed +SpeechSynthesisUtterance::Constructor(GlobalObject& aGlobal, + ErrorResult& aRv) +{ + return Constructor(aGlobal, EmptyString(), aRv); +} + +already_AddRefed +SpeechSynthesisUtterance::Constructor(GlobalObject& aGlobal, + const nsAString& aText, + ErrorResult& aRv) +{ + nsCOMPtr win = do_QueryInterface(aGlobal.GetAsSupports()); + + if (!win) { + aRv.Throw(NS_ERROR_FAILURE); + } + + MOZ_ASSERT(win->IsInnerWindow()); + RefPtr object = + new SpeechSynthesisUtterance(win, aText); + return object.forget(); +} + +void +SpeechSynthesisUtterance::GetText(nsString& aResult) const +{ + aResult = mText; +} + +void +SpeechSynthesisUtterance::SetText(const nsAString& aText) +{ + mText = aText; +} + +void +SpeechSynthesisUtterance::GetLang(nsString& aResult) const +{ + aResult = mLang; +} + +void +SpeechSynthesisUtterance::SetLang(const nsAString& aLang) +{ + mLang = aLang; +} + +SpeechSynthesisVoice* +SpeechSynthesisUtterance::GetVoice() const +{ + return mVoice; +} + +void +SpeechSynthesisUtterance::SetVoice(SpeechSynthesisVoice* aVoice) +{ + mVoice = aVoice; +} + +float +SpeechSynthesisUtterance::Volume() const +{ + return mVolume; +} + +void +SpeechSynthesisUtterance::SetVolume(float aVolume) +{ + mVolume = std::max(std::min(aVolume, 1), 0); +} + +float +SpeechSynthesisUtterance::Rate() const +{ + return mRate; +} + +void +SpeechSynthesisUtterance::SetRate(float aRate) +{ + mRate = std::max(std::min(aRate, 10), 0.1f); +} + +float +SpeechSynthesisUtterance::Pitch() const +{ + return mPitch; +} + +void +SpeechSynthesisUtterance::SetPitch(float aPitch) +{ + mPitch = std::max(std::min(aPitch, 2), 0); +} + +void +SpeechSynthesisUtterance::GetChosenVoiceURI(nsString& aResult) const +{ + aResult = mChosenVoiceURI; +} + +void +SpeechSynthesisUtterance::DispatchSpeechSynthesisEvent(const nsAString& aEventType, + uint32_t aCharIndex, + float aElapsedTime, + const nsAString& aName) +{ + SpeechSynthesisEventInit init; + init.mBubbles = false; + init.mCancelable = false; + init.mUtterance = this; + init.mCharIndex = aCharIndex; + init.mElapsedTime = aElapsedTime; + init.mName = aName; + + RefPtr event = + SpeechSynthesisEvent::Constructor(this, aEventType, init); + DispatchTrustedEvent(event); +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/SpeechSynthesisUtterance.h b/dom/media/webspeech/synth/SpeechSynthesisUtterance.h new file mode 100644 index 000000000..e2b6840a1 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisUtterance.h @@ -0,0 +1,124 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisUtterance_h +#define mozilla_dom_SpeechSynthesisUtterance_h + +#include "mozilla/DOMEventTargetHelper.h" +#include "nsCOMPtr.h" +#include "nsString.h" +#include "js/TypeDecls.h" + +#include "nsSpeechTask.h" + +namespace mozilla { +namespace dom { + +class SpeechSynthesisVoice; +class SpeechSynthesis; +class nsSynthVoiceRegistry; + +class SpeechSynthesisUtterance final : public DOMEventTargetHelper +{ + friend class SpeechSynthesis; + friend class nsSpeechTask; + friend class nsSynthVoiceRegistry; + +public: + SpeechSynthesisUtterance(nsPIDOMWindowInner* aOwnerWindow, const nsAString& aText); + + NS_DECL_ISUPPORTS_INHERITED + NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechSynthesisUtterance, + DOMEventTargetHelper) + NS_REALLY_FORWARD_NSIDOMEVENTTARGET(DOMEventTargetHelper) + + nsISupports* GetParentObject() const; + + JSObject* WrapObject(JSContext* aCx, JS::Handle aGivenProto) override; + + static + already_AddRefed Constructor(GlobalObject& aGlobal, + ErrorResult& aRv); + static + already_AddRefed Constructor(GlobalObject& aGlobal, + const nsAString& aText, + ErrorResult& aRv); + + void GetText(nsString& aResult) const; + + void SetText(const nsAString& aText); + + void GetLang(nsString& aResult) const; + + void SetLang(const nsAString& aLang); + + SpeechSynthesisVoice* GetVoice() const; + + void SetVoice(SpeechSynthesisVoice* aVoice); + + float Volume() const; + + void SetVolume(float aVolume); + + float Rate() const; + + void SetRate(float aRate); + + float Pitch() const; + + void SetPitch(float aPitch); + + void GetChosenVoiceURI(nsString& aResult) const; + + enum { + STATE_NONE, + STATE_PENDING, + STATE_SPEAKING, + STATE_ENDED + }; + + uint32_t GetState() { return mState; } + + bool IsPaused() { return mPaused; } + + IMPL_EVENT_HANDLER(start) + IMPL_EVENT_HANDLER(end) + IMPL_EVENT_HANDLER(error) + IMPL_EVENT_HANDLER(pause) + IMPL_EVENT_HANDLER(resume) + IMPL_EVENT_HANDLER(mark) + IMPL_EVENT_HANDLER(boundary) + +private: + virtual ~SpeechSynthesisUtterance(); + + void DispatchSpeechSynthesisEvent(const nsAString& aEventType, + uint32_t aCharIndex, + float aElapsedTime, const nsAString& aName); + + nsString mText; + + nsString mLang; + + float mVolume; + + float mRate; + + float mPitch; + + nsString mChosenVoiceURI; + + uint32_t mState; + + bool mPaused; + + RefPtr mVoice; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp b/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp new file mode 100644 index 000000000..8c8f72284 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp @@ -0,0 +1,94 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesis.h" +#include "nsSynthVoiceRegistry.h" +#include "mozilla/dom/SpeechSynthesisVoiceBinding.h" + +namespace mozilla { +namespace dom { + +NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechSynthesisVoice, mParent) +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechSynthesisVoice) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechSynthesisVoice) +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesisVoice) + NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY + NS_INTERFACE_MAP_ENTRY(nsISupports) +NS_INTERFACE_MAP_END + +SpeechSynthesisVoice::SpeechSynthesisVoice(nsISupports* aParent, + const nsAString& aUri) + : mParent(aParent) + , mUri(aUri) +{ +} + +SpeechSynthesisVoice::~SpeechSynthesisVoice() +{ +} + +JSObject* +SpeechSynthesisVoice::WrapObject(JSContext* aCx, JS::Handle aGivenProto) +{ + return SpeechSynthesisVoiceBinding::Wrap(aCx, this, aGivenProto); +} + +nsISupports* +SpeechSynthesisVoice::GetParentObject() const +{ + return mParent; +} + +void +SpeechSynthesisVoice::GetVoiceURI(nsString& aRetval) const +{ + aRetval = mUri; +} + +void +SpeechSynthesisVoice::GetName(nsString& aRetval) const +{ + DebugOnly rv = + nsSynthVoiceRegistry::GetInstance()->GetVoiceName(mUri, aRetval); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Failed to get SpeechSynthesisVoice.name"); +} + +void +SpeechSynthesisVoice::GetLang(nsString& aRetval) const +{ + DebugOnly rv = + nsSynthVoiceRegistry::GetInstance()->GetVoiceLang(mUri, aRetval); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Failed to get SpeechSynthesisVoice.lang"); +} + +bool +SpeechSynthesisVoice::LocalService() const +{ + bool isLocal; + DebugOnly rv = + nsSynthVoiceRegistry::GetInstance()->IsLocalVoice(mUri, &isLocal); + NS_WARNING_ASSERTION( + NS_SUCCEEDED(rv), "Failed to get SpeechSynthesisVoice.localService"); + + return isLocal; +} + +bool +SpeechSynthesisVoice::Default() const +{ + bool isDefault; + DebugOnly rv = + nsSynthVoiceRegistry::GetInstance()->IsDefaultVoice(mUri, &isDefault); + NS_WARNING_ASSERTION( + NS_SUCCEEDED(rv), "Failed to get SpeechSynthesisVoice.default"); + + return isDefault; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/SpeechSynthesisVoice.h b/dom/media/webspeech/synth/SpeechSynthesisVoice.h new file mode 100644 index 000000000..5c3c651e2 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisVoice.h @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisVoice_h +#define mozilla_dom_SpeechSynthesisVoice_h + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsWrapperCache.h" +#include "js/TypeDecls.h" + +#include "nsISpeechService.h" + +namespace mozilla { +namespace dom { + +class nsSynthVoiceRegistry; +class SpeechSynthesis; + +class SpeechSynthesisVoice final : public nsISupports, + public nsWrapperCache +{ + friend class nsSynthVoiceRegistry; + friend class SpeechSynthesis; + +public: + SpeechSynthesisVoice(nsISupports* aParent, const nsAString& aUri); + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechSynthesisVoice) + + nsISupports* GetParentObject() const; + + JSObject* WrapObject(JSContext* aCx, JS::Handle aGivenProto) override; + + void GetVoiceURI(nsString& aRetval) const; + + void GetName(nsString& aRetval) const; + + void GetLang(nsString& aRetval) const; + + bool LocalService() const; + + bool Default() const; + +private: + virtual ~SpeechSynthesisVoice(); + + nsCOMPtr mParent; + + nsString mUri; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerModule.cpp b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerModule.cpp new file mode 100644 index 000000000..bc80eb06e --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerModule.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsIClassInfoImpl.h" + +#include "OSXSpeechSynthesizerService.h" + +using namespace mozilla::dom; + +#define OSXSPEECHSYNTHESIZERSERVICE_CID \ + {0x914e73b4, 0x6337, 0x4bef, {0x97, 0xf3, 0x4d, 0x06, 0x9e, 0x05, 0x3a, 0x12}} + +#define OSXSPEECHSYNTHESIZERSERVICE_CONTRACTID "@mozilla.org/synthsystem;1" + +// Defines OSXSpeechSynthesizerServiceConstructor +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(OSXSpeechSynthesizerService, + OSXSpeechSynthesizerService::GetInstanceForService) + +// Defines kOSXSERVICE_CID +NS_DEFINE_NAMED_CID(OSXSPEECHSYNTHESIZERSERVICE_CID); + +static const mozilla::Module::CIDEntry kCIDs[] = { + { &kOSXSPEECHSYNTHESIZERSERVICE_CID, true, nullptr, OSXSpeechSynthesizerServiceConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kContracts[] = { + { OSXSPEECHSYNTHESIZERSERVICE_CONTRACTID, &kOSXSPEECHSYNTHESIZERSERVICE_CID }, + { nullptr } +}; + +static const mozilla::Module::CategoryEntry kCategories[] = { + { "speech-synth-started", "OSX Speech Synth", OSXSPEECHSYNTHESIZERSERVICE_CONTRACTID }, + { nullptr } +}; + +static void +UnloadOSXSpeechSynthesizerModule() +{ + OSXSpeechSynthesizerService::Shutdown(); +} + +static const mozilla::Module kModule = { + mozilla::Module::kVersion, + kCIDs, + kContracts, + kCategories, + nullptr, + nullptr, + UnloadOSXSpeechSynthesizerModule +}; + +NSMODULE_DEFN(osxsynth) = &kModule; diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h new file mode 100644 index 000000000..18f52e59e --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_OsxSpeechSynthesizerService_h +#define mozilla_dom_OsxSpeechSynthesizerService_h + +#include "nsISpeechService.h" +#include "nsIObserver.h" +#include "mozilla/StaticPtr.h" + +namespace mozilla { +namespace dom { + +class OSXSpeechSynthesizerService final : public nsISpeechService + , public nsIObserver +{ +public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + NS_DECL_NSIOBSERVER + + bool Init(); + + static OSXSpeechSynthesizerService* GetInstance(); + static already_AddRefed GetInstanceForService(); + static void Shutdown(); + +private: + OSXSpeechSynthesizerService(); + virtual ~OSXSpeechSynthesizerService(); + + bool RegisterVoices(); + + bool mInitialized; + static mozilla::StaticRefPtr sSingleton; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm new file mode 100644 index 000000000..df6c5eaa0 --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm @@ -0,0 +1,499 @@ +/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 sw=2 et tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsServiceManagerUtils.h" +#include "nsObjCExceptions.h" +#include "nsCocoaUtils.h" +#include "nsThreadUtils.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/Preferences.h" +#include "mozilla/Assertions.h" +#include "OSXSpeechSynthesizerService.h" + +#import + +// We can escape the default delimiters ("[[" and "]]") by temporarily +// changing the delimiters just before they appear, and changing them back +// just after. +#define DLIM_ESCAPE_START "[[dlim (( ))]]" +#define DLIM_ESCAPE_END "((dlim [[ ]]))" + +using namespace mozilla; + +class SpeechTaskCallback final : public nsISpeechTaskCallback +{ +public: + SpeechTaskCallback(nsISpeechTask* aTask, + NSSpeechSynthesizer* aSynth, + const nsTArray& aOffsets) + : mTask(aTask) + , mSpeechSynthesizer(aSynth) + , mOffsets(aOffsets) + { + mStartingTime = TimeStamp::Now(); + } + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + void OnWillSpeakWord(uint32_t aIndex); + void OnError(uint32_t aIndex); + void OnDidFinishSpeaking(); + +private: + virtual ~SpeechTaskCallback() + { + [mSpeechSynthesizer release]; + } + + float GetTimeDurationFromStart(); + + nsCOMPtr mTask; + NSSpeechSynthesizer* mSpeechSynthesizer; + TimeStamp mStartingTime; + uint32_t mCurrentIndex; + nsTArray mOffsets; +}; + +NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback) + +NS_IMETHODIMP +SpeechTaskCallback::OnCancel() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer stopSpeaking]; + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnPause() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; + if (!mTask) { + // When calling pause() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnResume() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer continueSpeaking]; + if (!mTask) { + // When calling resume() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnVolumeChanged(float aVolume) +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume] + forProperty:NSSpeechVolumeProperty error:nil]; + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +float +SpeechTaskCallback::GetTimeDurationFromStart() +{ + TimeDuration duration = TimeStamp::Now() - mStartingTime; + return duration.ToMilliseconds(); +} + +void +SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex) +{ + mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex; + if (!mTask) { + return; + } + mTask->DispatchBoundary(NS_LITERAL_STRING("word"), + GetTimeDurationFromStart(), mCurrentIndex); +} + +void +SpeechTaskCallback::OnError(uint32_t aIndex) +{ + if (!mTask) { + return; + } + mTask->DispatchError(GetTimeDurationFromStart(), aIndex); +} + +void +SpeechTaskCallback::OnDidFinishSpeaking() +{ + mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex); + // no longer needed + [mSpeechSynthesizer setDelegate:nil]; + mTask = nullptr; +} + +@interface SpeechDelegate : NSObject +{ +@private + SpeechTaskCallback* mCallback; +} + + - (id)initWithCallback:(SpeechTaskCallback*)aCallback; +@end + +@implementation SpeechDelegate +- (id)initWithCallback:(SpeechTaskCallback*)aCallback +{ + [super init]; + mCallback = aCallback; + return self; +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)aSender + willSpeakWord:(NSRange)aRange ofString:(NSString*)aString +{ + mCallback->OnWillSpeakWord(aRange.location); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)aSender + didFinishSpeaking:(BOOL)aFinishedSpeaking +{ + mCallback->OnDidFinishSpeaking(); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender + didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex + ofString:(NSString*)aString + message:(NSString*)aMessage +{ + mCallback->OnError(aCharacterIndex); +} +@end + +namespace mozilla { +namespace dom { + +struct OSXVoice +{ + OSXVoice() : mIsDefault(false) + { + } + + nsString mUri; + nsString mName; + nsString mLocale; + bool mIsDefault; +}; + +class RegisterVoicesRunnable final : public Runnable +{ +public: + RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, + nsTArray& aList) + : mSpeechService(aSpeechService) + , mVoices(aList) + { + } + + NS_IMETHOD Run() override; + +private: + ~RegisterVoicesRunnable() + { + } + + // This runnable always use sync mode. It is unnecesarry to reference object + OSXSpeechSynthesizerService* mSpeechService; + nsTArray& mVoices; +}; + +NS_IMETHODIMP +RegisterVoicesRunnable::Run() +{ + nsresult rv; + nsCOMPtr registry = + do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv); + if (!registry) { + return rv; + } + + for (OSXVoice voice : mVoices) { + rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false); + if (NS_WARN_IF(NS_FAILED(rv))) { + continue; + } + + if (voice.mIsDefault) { + registry->SetDefaultVoice(voice.mUri, true); + } + } + + registry->NotifyVoicesChanged(); + + return NS_OK; +} + +class EnumVoicesRunnable final : public Runnable +{ +public: + explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService) + : mSpeechService(aSpeechService) + { + } + + NS_IMETHOD Run() override; + +private: + ~EnumVoicesRunnable() + { + } + + RefPtr mSpeechService; +}; + +NS_IMETHODIMP +EnumVoicesRunnable::Run() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + AutoTArray list; + + NSArray* voices = [NSSpeechSynthesizer availableVoices]; + NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; + + for (NSString* voice in voices) { + OSXVoice item; + + NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice]; + + nsAutoString identifier; + nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], + identifier); + + nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName); + + nsCocoaUtils::GetStringForNSString( + [attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale); + item.mLocale.ReplaceChar('_', '-'); + + item.mUri.AssignLiteral("urn:moz-tts:osx:"); + item.mUri.Append(identifier); + + if ([voice isEqualToString:defaultVoice]) { + item.mIsDefault = true; + } + + list.AppendElement(item); + } + + RefPtr runnable = new RegisterVoicesRunnable(mSpeechService, list); + NS_DispatchToMainThread(runnable, NS_DISPATCH_SYNC); + + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +StaticRefPtr OSXSpeechSynthesizerService::sSingleton; + +NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(OSXSpeechSynthesizerService) +NS_IMPL_RELEASE(OSXSpeechSynthesizerService) + +OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() + : mInitialized(false) +{ +} + +OSXSpeechSynthesizerService::~OSXSpeechSynthesizerService() +{ +} + +bool +OSXSpeechSynthesizerService::Init() +{ + if (Preferences::GetBool("media.webspeech.synth.test") || + !Preferences::GetBool("media.webspeech.synth.enabled")) { + // When test is enabled, we shouldn't add OS backend (Bug 1160844) + return false; + } + + nsCOMPtr thread; + if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) { + return false; + } + + // Get all the voices and register in the SynthVoiceRegistry + nsCOMPtr runnable = new EnumVoicesRunnable(this); + thread->Dispatch(runnable, NS_DISPATCH_NORMAL); + + mInitialized = true; + return true; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::Speak(const nsAString& aText, + const nsAString& aUri, + float aVolume, + float aRate, + float aPitch, + nsISpeechTask* aTask) +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + MOZ_ASSERT(StringBeginsWith(aUri, NS_LITERAL_STRING("urn:moz-tts:osx:")), + "OSXSpeechSynthesizerService doesn't allow this voice URI"); + + NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init]; + // strlen("urn:moz-tts:osx:") == 16 + NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16)); + [synth setVoice:identifier]; + + // default rate is 180-220 + [synth setObject:[NSNumber numberWithInt:aRate * 200] + forProperty:NSSpeechRateProperty error:nil]; + // volume allows 0.0-1.0 + [synth setObject:[NSNumber numberWithFloat:aVolume] + forProperty:NSSpeechVolumeProperty error:nil]; + // Use default pitch value to calculate this + NSNumber* defaultPitch = + [synth objectForProperty:NSSpeechPitchBaseProperty error:nil]; + if (defaultPitch) { + int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5); + [synth setObject:[NSNumber numberWithInt:newPitch] + forProperty:NSSpeechPitchBaseProperty error:nil]; + } + + nsAutoString escapedText; + // We need to map the the offsets from the given text to the escaped text. + // The index of the offsets array is the position in the escaped text, + // the element value is the position in the user-supplied text. + nsTArray offsets; + offsets.SetCapacity(aText.Length()); + + // This loop looks for occurances of "[[" or "]]", escapes them, and + // populates the offsets array to supply a map to the original offsets. + for (size_t i = 0; i < aText.Length(); i++) { + if (aText.Length() > i + 1 && + ((aText[i] == ']' && aText[i+1] == ']') || + (aText[i] == '[' && aText[i+1] == '['))) { + escapedText.AppendLiteral(DLIM_ESCAPE_START); + offsets.AppendElements(strlen(DLIM_ESCAPE_START)); + escapedText.Append(aText[i]); + offsets.AppendElement(i); + escapedText.Append(aText[++i]); + offsets.AppendElement(i); + escapedText.AppendLiteral(DLIM_ESCAPE_END); + offsets.AppendElements(strlen(DLIM_ESCAPE_END)); + } else { + escapedText.Append(aText[i]); + offsets.AppendElement(i); + } + } + + RefPtr callback = new SpeechTaskCallback(aTask, synth, offsets); + nsresult rv = aTask->Setup(callback, 0, 0, 0); + NS_ENSURE_SUCCESS(rv, rv); + + SpeechDelegate* delegate = [[SpeechDelegate alloc] initWithCallback:callback]; + [synth setDelegate:delegate]; + [delegate release ]; + + NSString* text = nsCocoaUtils::ToNSString(escapedText); + BOOL success = [synth startSpeakingString:text]; + NS_ENSURE_TRUE(success, NS_ERROR_FAILURE); + + aTask->DispatchStart(); + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + return NS_OK; +} + +OSXSpeechSynthesizerService* +OSXSpeechSynthesizerService::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (XRE_GetProcessType() != GeckoProcessType_Default) { + return nullptr; + } + + if (!sSingleton) { + RefPtr speechService = + new OSXSpeechSynthesizerService(); + if (speechService->Init()) { + sSingleton = speechService; + } + } + return sSingleton; +} + +already_AddRefed +OSXSpeechSynthesizerService::GetInstanceForService() +{ + RefPtr speechService = GetInstance(); + return speechService.forget(); +} + +void +OSXSpeechSynthesizerService::Shutdown() +{ + if (!sSingleton) { + return; + } + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/cocoa/moz.build b/dom/media/webspeech/synth/cocoa/moz.build new file mode 100644 index 000000000..b48680c7a --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/moz.build @@ -0,0 +1,12 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += [ + 'OSXSpeechSynthesizerModule.cpp', + 'OSXSpeechSynthesizerService.mm' +] + +FINAL_LIBRARY = 'xul' diff --git a/dom/media/webspeech/synth/crashtests/1230428.html b/dom/media/webspeech/synth/crashtests/1230428.html new file mode 100644 index 000000000..40fa00071 --- /dev/null +++ b/dom/media/webspeech/synth/crashtests/1230428.html @@ -0,0 +1,32 @@ + + + + + + + + + diff --git a/dom/media/webspeech/synth/crashtests/crashtests.list b/dom/media/webspeech/synth/crashtests/crashtests.list new file mode 100644 index 000000000..07e931c92 --- /dev/null +++ b/dom/media/webspeech/synth/crashtests/crashtests.list @@ -0,0 +1 @@ +skip-if(!cocoaWidget) pref(media.webspeech.synth.enabled,true) load 1230428.html # bug 1230428 diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl new file mode 100644 index 000000000..7b66034e4 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl @@ -0,0 +1,49 @@ +/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 40 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +include protocol PContent; +include protocol PSpeechSynthesisRequest; + +namespace mozilla { +namespace dom { + +struct RemoteVoice { + nsString voiceURI; + nsString name; + nsString lang; + bool localService; + bool queued; +}; + +sync protocol PSpeechSynthesis +{ + manager PContent; + manages PSpeechSynthesisRequest; + +child: + + async VoiceAdded(RemoteVoice aVoice); + + async VoiceRemoved(nsString aUri); + + async SetDefaultVoice(nsString aUri, bool aIsDefault); + + async IsSpeakingChanged(bool aIsSpeaking); + + async NotifyVoicesChanged(); + +parent: + async __delete__(); + + async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang, + float aVolume, float aRate, float aPitch); + + sync ReadVoicesAndState() returns (RemoteVoice[] aVoices, + nsString[] aDefaults, bool aIsSpeaking); +}; + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl new file mode 100644 index 000000000..9ffea29f5 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl @@ -0,0 +1,46 @@ +/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 40 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +include protocol PSpeechSynthesis; + +namespace mozilla { +namespace dom { + +async protocol PSpeechSynthesisRequest +{ + manager PSpeechSynthesis; + + parent: + + async __delete__(); + + async Pause(); + + async Resume(); + + async Cancel(); + + async ForceEnd(); + + async SetAudioOutputVolume(float aVolume); + + child: + + async OnEnd(bool aIsError, float aElapsedTime, uint32_t aCharIndex); + + async OnStart(nsString aUri); + + async OnPause(float aElapsedTime, uint32_t aCharIndex); + + async OnResume(float aElapsedTime, uint32_t aCharIndex); + + async OnBoundary(nsString aName, float aElapsedTime, uint32_t aCharIndex); + + async OnMark(nsString aName, float aElapsedTime, uint32_t aCharIndex); +}; + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp new file mode 100644 index 000000000..d1cb8bf0e --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp @@ -0,0 +1,213 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesisChild.h" +#include "nsSynthVoiceRegistry.h" + +namespace mozilla { +namespace dom { + +SpeechSynthesisChild::SpeechSynthesisChild() +{ + MOZ_COUNT_CTOR(SpeechSynthesisChild); +} + +SpeechSynthesisChild::~SpeechSynthesisChild() +{ + MOZ_COUNT_DTOR(SpeechSynthesisChild); +} + +bool +SpeechSynthesisChild::RecvVoiceAdded(const RemoteVoice& aVoice) +{ + nsSynthVoiceRegistry::RecvAddVoice(aVoice); + return true; +} + +bool +SpeechSynthesisChild::RecvVoiceRemoved(const nsString& aUri) +{ + nsSynthVoiceRegistry::RecvRemoveVoice(aUri); + return true; +} + +bool +SpeechSynthesisChild::RecvSetDefaultVoice(const nsString& aUri, + const bool& aIsDefault) +{ + nsSynthVoiceRegistry::RecvSetDefaultVoice(aUri, aIsDefault); + return true; +} + +bool +SpeechSynthesisChild::RecvIsSpeakingChanged(const bool& aIsSpeaking) +{ + nsSynthVoiceRegistry::RecvIsSpeakingChanged(aIsSpeaking); + return true; +} + +bool +SpeechSynthesisChild::RecvNotifyVoicesChanged() +{ + nsSynthVoiceRegistry::RecvNotifyVoicesChanged(); + return true; +} + +PSpeechSynthesisRequestChild* +SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText, + const nsString& aLang, + const nsString& aUri, + const float& aVolume, + const float& aRate, + const float& aPitch) +{ + MOZ_CRASH("Caller is supposed to manually construct a request!"); +} + +bool +SpeechSynthesisChild::DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) +{ + delete aActor; + return true; +} + +// SpeechSynthesisRequestChild + +SpeechSynthesisRequestChild::SpeechSynthesisRequestChild(SpeechTaskChild* aTask) + : mTask(aTask) +{ + mTask->mActor = this; + MOZ_COUNT_CTOR(SpeechSynthesisRequestChild); +} + +SpeechSynthesisRequestChild::~SpeechSynthesisRequestChild() +{ + MOZ_COUNT_DTOR(SpeechSynthesisRequestChild); +} + +bool +SpeechSynthesisRequestChild::RecvOnStart(const nsString& aUri) +{ + mTask->DispatchStartImpl(aUri); + return true; +} + +bool +SpeechSynthesisRequestChild::RecvOnEnd(const bool& aIsError, + const float& aElapsedTime, + const uint32_t& aCharIndex) +{ + SpeechSynthesisRequestChild* actor = mTask->mActor; + mTask->mActor = nullptr; + + if (aIsError) { + mTask->DispatchErrorImpl(aElapsedTime, aCharIndex); + } else { + mTask->DispatchEndImpl(aElapsedTime, aCharIndex); + } + + actor->Send__delete__(actor); + + return true; +} + +bool +SpeechSynthesisRequestChild::RecvOnPause(const float& aElapsedTime, + const uint32_t& aCharIndex) +{ + mTask->DispatchPauseImpl(aElapsedTime, aCharIndex); + return true; +} + +bool +SpeechSynthesisRequestChild::RecvOnResume(const float& aElapsedTime, + const uint32_t& aCharIndex) +{ + mTask->DispatchResumeImpl(aElapsedTime, aCharIndex); + return true; +} + +bool +SpeechSynthesisRequestChild::RecvOnBoundary(const nsString& aName, + const float& aElapsedTime, + const uint32_t& aCharIndex) +{ + mTask->DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex); + return true; +} + +bool +SpeechSynthesisRequestChild::RecvOnMark(const nsString& aName, + const float& aElapsedTime, + const uint32_t& aCharIndex) +{ + mTask->DispatchMarkImpl(aName, aElapsedTime, aCharIndex); + return true; +} + +// SpeechTaskChild + +SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance) + : nsSpeechTask(aUtterance) +{ +} + +NS_IMETHODIMP +SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback, + uint32_t aChannels, uint32_t aRate, uint8_t argc) +{ + MOZ_CRASH("Should never be called from child"); +} + +NS_IMETHODIMP +SpeechTaskChild::SendAudio(JS::Handle aData, JS::Handle aLandmarks, + JSContext* aCx) +{ + MOZ_CRASH("Should never be called from child"); +} + +NS_IMETHODIMP +SpeechTaskChild::SendAudioNative(int16_t* aData, uint32_t aDataLen) +{ + MOZ_CRASH("Should never be called from child"); +} + +void +SpeechTaskChild::Pause() +{ + MOZ_ASSERT(mActor); + mActor->SendPause(); +} + +void +SpeechTaskChild::Resume() +{ + MOZ_ASSERT(mActor); + mActor->SendResume(); +} + +void +SpeechTaskChild::Cancel() +{ + MOZ_ASSERT(mActor); + mActor->SendCancel(); +} + +void +SpeechTaskChild::ForceEnd() +{ + MOZ_ASSERT(mActor); + mActor->SendForceEnd(); +} + +void +SpeechTaskChild::SetAudioOutputVolume(float aVolume) +{ + if (mActor) { + mActor->SendSetAudioOutputVolume(aVolume); + } +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h new file mode 100644 index 000000000..01e9ffbdd --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h @@ -0,0 +1,106 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisChild_h +#define mozilla_dom_SpeechSynthesisChild_h + +#include "mozilla/Attributes.h" +#include "mozilla/dom/PSpeechSynthesisChild.h" +#include "mozilla/dom/PSpeechSynthesisRequestChild.h" +#include "nsSpeechTask.h" + +namespace mozilla { +namespace dom { + +class nsSynthVoiceRegistry; +class SpeechSynthesisRequestChild; +class SpeechTaskChild; + +class SpeechSynthesisChild : public PSpeechSynthesisChild +{ + friend class nsSynthVoiceRegistry; + +public: + bool RecvVoiceAdded(const RemoteVoice& aVoice) override; + + bool RecvVoiceRemoved(const nsString& aUri) override; + + bool RecvSetDefaultVoice(const nsString& aUri, const bool& aIsDefault) override; + + bool RecvIsSpeakingChanged(const bool& aIsSpeaking) override; + + bool RecvNotifyVoicesChanged() override; + +protected: + SpeechSynthesisChild(); + virtual ~SpeechSynthesisChild(); + + PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild(const nsString& aLang, + const nsString& aUri, + const nsString& aText, + const float& aVolume, + const float& aPitch, + const float& aRate) override; + bool DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) override; +}; + +class SpeechSynthesisRequestChild : public PSpeechSynthesisRequestChild +{ +public: + explicit SpeechSynthesisRequestChild(SpeechTaskChild* aTask); + virtual ~SpeechSynthesisRequestChild(); + +protected: + bool RecvOnStart(const nsString& aUri) override; + + bool RecvOnEnd(const bool& aIsError, + const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + bool RecvOnPause(const float& aElapsedTime, const uint32_t& aCharIndex) override; + + bool RecvOnResume(const float& aElapsedTime, const uint32_t& aCharIndex) override; + + bool RecvOnBoundary(const nsString& aName, const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + bool RecvOnMark(const nsString& aName, const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + RefPtr mTask; +}; + +class SpeechTaskChild : public nsSpeechTask +{ + friend class SpeechSynthesisRequestChild; +public: + + explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance); + + NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback, + uint32_t aChannels, uint32_t aRate, uint8_t argc) override; + + NS_IMETHOD SendAudio(JS::Handle aData, JS::Handle aLandmarks, + JSContext* aCx) override; + + NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override; + + void Pause() override; + + void Resume() override; + + void Cancel() override; + + void ForceEnd() override; + + void SetAudioOutputVolume(float aVolume) override; + +private: + SpeechSynthesisRequestChild* mActor; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp new file mode 100644 index 000000000..8dc70e872 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp @@ -0,0 +1,234 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesisParent.h" +#include "nsSynthVoiceRegistry.h" + +namespace mozilla { +namespace dom { + +SpeechSynthesisParent::SpeechSynthesisParent() +{ + MOZ_COUNT_CTOR(SpeechSynthesisParent); +} + +SpeechSynthesisParent::~SpeechSynthesisParent() +{ + MOZ_COUNT_DTOR(SpeechSynthesisParent); +} + +void +SpeechSynthesisParent::ActorDestroy(ActorDestroyReason aWhy) +{ + // Implement me! Bug 1005141 +} + +bool +SpeechSynthesisParent::RecvReadVoicesAndState(InfallibleTArray* aVoices, + InfallibleTArray* aDefaults, + bool* aIsSpeaking) +{ + nsSynthVoiceRegistry::GetInstance()->SendVoicesAndState(aVoices, aDefaults, + aIsSpeaking); + return true; +} + +PSpeechSynthesisRequestParent* +SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText, + const nsString& aLang, + const nsString& aUri, + const float& aVolume, + const float& aRate, + const float& aPitch) +{ + RefPtr task = new SpeechTaskParent(aVolume, aText); + SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task); + return actor; +} + +bool +SpeechSynthesisParent::DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) +{ + delete aActor; + return true; +} + +bool +SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor, + const nsString& aText, + const nsString& aLang, + const nsString& aUri, + const float& aVolume, + const float& aRate, + const float& aPitch) +{ + MOZ_ASSERT(aActor); + SpeechSynthesisRequestParent* actor = + static_cast(aActor); + nsSynthVoiceRegistry::GetInstance()->Speak(aText, aLang, aUri, aVolume, aRate, + aPitch, actor->mTask); + return true; +} + +// SpeechSynthesisRequestParent + +SpeechSynthesisRequestParent::SpeechSynthesisRequestParent(SpeechTaskParent* aTask) + : mTask(aTask) +{ + mTask->mActor = this; + MOZ_COUNT_CTOR(SpeechSynthesisRequestParent); +} + +SpeechSynthesisRequestParent::~SpeechSynthesisRequestParent() +{ + if (mTask) { + mTask->mActor = nullptr; + // If we still have a task, cancel it. + mTask->Cancel(); + } + MOZ_COUNT_DTOR(SpeechSynthesisRequestParent); +} + +void +SpeechSynthesisRequestParent::ActorDestroy(ActorDestroyReason aWhy) +{ + // Implement me! Bug 1005141 +} + +bool +SpeechSynthesisRequestParent::RecvPause() +{ + MOZ_ASSERT(mTask); + mTask->Pause(); + return true; +} + +bool +SpeechSynthesisRequestParent::Recv__delete__() +{ + MOZ_ASSERT(mTask); + mTask->mActor = nullptr; + mTask = nullptr; + return true; +} + +bool +SpeechSynthesisRequestParent::RecvResume() +{ + MOZ_ASSERT(mTask); + mTask->Resume(); + return true; +} + +bool +SpeechSynthesisRequestParent::RecvCancel() +{ + MOZ_ASSERT(mTask); + mTask->Cancel(); + return true; +} + +bool +SpeechSynthesisRequestParent::RecvForceEnd() +{ + MOZ_ASSERT(mTask); + mTask->ForceEnd(); + return true; +} + +bool +SpeechSynthesisRequestParent::RecvSetAudioOutputVolume(const float& aVolume) +{ + MOZ_ASSERT(mTask); + mTask->SetAudioOutputVolume(aVolume); + return true; +} + +// SpeechTaskParent + +nsresult +SpeechTaskParent::DispatchStartImpl(const nsAString& aUri) +{ + MOZ_ASSERT(mActor); + if(NS_WARN_IF(!(mActor->SendOnStart(nsString(aUri))))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +SpeechTaskParent::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) +{ + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if(NS_WARN_IF(!(mActor->SendOnEnd(false, aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +SpeechTaskParent::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mActor); + if(NS_WARN_IF(!(mActor->SendOnPause(aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +SpeechTaskParent::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mActor); + if(NS_WARN_IF(!(mActor->SendOnResume(aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +SpeechTaskParent::DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mActor); + if(NS_WARN_IF(!(mActor->SendOnEnd(true, aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +SpeechTaskParent::DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mActor); + if(NS_WARN_IF(!(mActor->SendOnBoundary(nsString(aName), aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +SpeechTaskParent::DispatchMarkImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mActor); + if(NS_WARN_IF(!(mActor->SendOnMark(nsString(aName), aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h new file mode 100644 index 000000000..2edd7e28e --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h @@ -0,0 +1,108 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisParent_h +#define mozilla_dom_SpeechSynthesisParent_h + +#include "mozilla/dom/PSpeechSynthesisParent.h" +#include "mozilla/dom/PSpeechSynthesisRequestParent.h" +#include "nsSpeechTask.h" + +namespace mozilla { +namespace dom { + +class ContentParent; +class SpeechTaskParent; +class SpeechSynthesisRequestParent; + +class SpeechSynthesisParent : public PSpeechSynthesisParent +{ + friend class ContentParent; + friend class SpeechSynthesisRequestParent; + +public: + void ActorDestroy(ActorDestroyReason aWhy) override; + + bool RecvReadVoicesAndState(InfallibleTArray* aVoices, + InfallibleTArray* aDefaults, + bool* aIsSpeaking) override; + +protected: + SpeechSynthesisParent(); + virtual ~SpeechSynthesisParent(); + PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent(const nsString& aText, + const nsString& aLang, + const nsString& aUri, + const float& aVolume, + const float& aRate, + const float& aPitch) + override; + + bool DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) override; + + bool RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor, + const nsString& aText, + const nsString& aLang, + const nsString& aUri, + const float& aVolume, + const float& aRate, + const float& aPitch) override; +}; + +class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent +{ +public: + explicit SpeechSynthesisRequestParent(SpeechTaskParent* aTask); + virtual ~SpeechSynthesisRequestParent(); + + RefPtr mTask; + +protected: + + void ActorDestroy(ActorDestroyReason aWhy) override; + + bool RecvPause() override; + + bool RecvResume() override; + + bool RecvCancel() override; + + bool RecvForceEnd() override; + + bool RecvSetAudioOutputVolume(const float& aVolume) override; + + bool Recv__delete__() override; +}; + +class SpeechTaskParent : public nsSpeechTask +{ + friend class SpeechSynthesisRequestParent; +public: + SpeechTaskParent(float aVolume, const nsAString& aUtterance) + : nsSpeechTask(aVolume, aUtterance) {} + + nsresult DispatchStartImpl(const nsAString& aUri); + + nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex); + + nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex); + + nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex); + + nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex); + + nsresult DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex); + + nsresult DispatchMarkImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex); + +private: + SpeechSynthesisRequestParent* mActor; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/moz.build b/dom/media/webspeech/synth/moz.build new file mode 100644 index 000000000..bb26515af --- /dev/null +++ b/dom/media/webspeech/synth/moz.build @@ -0,0 +1,70 @@ +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +if CONFIG['MOZ_WEBSPEECH']: + MOCHITEST_MANIFESTS += [ + 'test/mochitest.ini', + 'test/startup/mochitest.ini', + ] + + XPIDL_MODULE = 'dom_webspeechsynth' + + XPIDL_SOURCES += [ + 'nsISpeechService.idl', + 'nsISynthVoiceRegistry.idl' + ] + + EXPORTS.mozilla.dom += [ + 'ipc/SpeechSynthesisChild.h', + 'ipc/SpeechSynthesisParent.h', + 'nsSpeechTask.h', + 'nsSynthVoiceRegistry.h', + 'SpeechSynthesis.h', + 'SpeechSynthesisUtterance.h', + 'SpeechSynthesisVoice.h', + ] + + UNIFIED_SOURCES += [ + 'ipc/SpeechSynthesisChild.cpp', + 'ipc/SpeechSynthesisParent.cpp', + 'nsSpeechTask.cpp', + 'nsSynthVoiceRegistry.cpp', + 'SpeechSynthesis.cpp', + 'SpeechSynthesisUtterance.cpp', + 'SpeechSynthesisVoice.cpp', + ] + + if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']: + UNIFIED_SOURCES += [ + 'test/FakeSynthModule.cpp', + 'test/nsFakeSynthServices.cpp' + ] + + if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'windows': + DIRS += ['windows'] + + if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'cocoa': + DIRS += ['cocoa'] + + if CONFIG['MOZ_SYNTH_SPEECHD']: + DIRS += ['speechd'] + + if CONFIG['MOZ_SYNTH_PICO']: + DIRS += ['pico'] + +IPDL_SOURCES += [ + 'ipc/PSpeechSynthesis.ipdl', + 'ipc/PSpeechSynthesisRequest.ipdl', +] + +include('/ipc/chromium/chromium-config.mozbuild') + +FINAL_LIBRARY = 'xul' +LOCAL_INCLUDES += [ + 'ipc', +] + +if CONFIG['GNU_CXX']: + CXXFLAGS += ['-Wno-error=shadow'] diff --git a/dom/media/webspeech/synth/nsISpeechService.idl b/dom/media/webspeech/synth/nsISpeechService.idl new file mode 100644 index 000000000..710686f1e --- /dev/null +++ b/dom/media/webspeech/synth/nsISpeechService.idl @@ -0,0 +1,173 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +typedef unsigned short SpeechServiceType; + +/** + * A callback is implemented by the service. For direct audio services, it is + * required to implement these, although it could be helpful to use the + * cancel method for shutting down the speech resources. + */ +[scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)] +interface nsISpeechTaskCallback : nsISupports +{ + /** + * The user or application has paused the speech. + */ + void onPause(); + + /** + * The user or application has resumed the speech. + */ + void onResume(); + + /** + * The user or application has canceled the speech. + */ + void onCancel(); + + /** + * The user or application has changed the volume of this speech. + * This is only used on indirect audio service type. + */ + void onVolumeChanged(in float aVolume); +}; + + +/** + * A task is associated with a single utterance. It is provided by the browser + * to the service in the speak() method. + */ +[scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)] +interface nsISpeechTask : nsISupports +{ + /** + * Prepare browser for speech. + * + * @param aCallback callback object for mid-speech operations. + * @param aChannels number of audio channels. Only required + * in direct audio services + * @param aRate audio rate. Only required in direct audio services + */ + [optional_argc] void setup(in nsISpeechTaskCallback aCallback, + [optional] in uint32_t aChannels, + [optional] in uint32_t aRate); + + /** + * Send audio data to browser. + * + * @param aData an Int16Array with PCM-16 audio data. + * @param aLandmarks an array of sample offset and landmark pairs. + * Used for emiting boundary and mark events. + */ + [implicit_jscontext] + void sendAudio(in jsval aData, in jsval aLandmarks); + + [noscript] + void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen); + + /** + * Dispatch start event. + */ + void dispatchStart(); + + /** + * Dispatch end event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchEnd(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch pause event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchPause(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch resume event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchResume(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch error event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchError(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch boundary event. + * + * @param aName name of boundary, 'word' or 'sentence' + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchBoundary(in DOMString aName, in float aElapsedTime, + in unsigned long aCharIndex); + + /** + * Dispatch mark event. + * + * @param aName mark identifier. + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchMark(in DOMString aName, in float aElapsedTime, in unsigned long aCharIndex); +}; + +/** + * The main interface of a speech synthesis service. + * + * A service's speak method could be implemented in two ways: + * 1. Indirect audio - the service is responsible for outputting audio. + * The service calls the nsISpeechTask.dispatch* methods directly. Starting + * with dispatchStart() and ending with dispatchEnd or dispatchError(). + * + * 2. Direct audio - the service provides us with PCM-16 data, and we output it. + * The service does not call the dispatch task methods directly. Instead, + * audio information is provided at setup(), and audio data is sent with + * sendAudio(). The utterance is terminated with an empty sendAudio(). + */ +[scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)] +interface nsISpeechService : nsISupports +{ + /** + * Speak the given text using the voice identified byu the given uri. See + * W3C Speech API spec for information about pitch and rate. + * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes + * + * @param aText text to utter. + * @param aUri unique voice identifier. + * @param aVolume volume to speak voice in. Only relevant for indirect audio. + * @param aRate rate to speak voice in. + * @param aPitch pitch to speak voice in. + * @param aTask task instance for utterance, used for sending events or audio + * data back to browser. + */ + void speak(in DOMString aText, in DOMString aUri, + in float aVolume, in float aRate, in float aPitch, + in nsISpeechTask aTask); + + const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1; + const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2; + + readonly attribute SpeechServiceType serviceType; +}; + +%{C++ +// This is the service category speech services could use to start up as +// a component. +#define NS_SPEECH_SYNTH_STARTED "speech-synth-started" +%} diff --git a/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl b/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl new file mode 100644 index 000000000..cc0a5e1c6 --- /dev/null +++ b/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl @@ -0,0 +1,77 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsISpeechService; + +[scriptable, builtinclass, uuid(5d7a0b38-77e5-4ee5-897c-ce5db9b85d44)] +interface nsISynthVoiceRegistry : nsISupports +{ + /** + * Register a speech synthesis voice. + * + * @param aService the service that provides this voice. + * @param aUri a unique identifier for this voice. + * @param aName human-readable name for this voice. + * @param aLang a BCP 47 language tag. + * @param aLocalService true if service does not require network. + * @param aQueuesUtterances true if voice only speaks one utterance at a time + */ + void addVoice(in nsISpeechService aService, in DOMString aUri, + in DOMString aName, in DOMString aLang, + in boolean aLocalService, in boolean aQueuesUtterances); + + /** + * Remove a speech synthesis voice. + * + * @param aService the service that was used to add the voice. + * @param aUri a unique identifier of an existing voice. + */ + void removeVoice(in nsISpeechService aService, in DOMString aUri); + + /** + * Notify content of voice availability changes. This allows content + * to be notified of voice catalog changes in real time. + */ + void notifyVoicesChanged(); + + /** + * Set a voice as default. + * + * @param aUri a unique identifier of an existing voice. + * @param aIsDefault true if this voice should be toggled as default. + */ + void setDefaultVoice(in DOMString aUri, in boolean aIsDefault); + + readonly attribute uint32_t voiceCount; + + AString getVoice(in uint32_t aIndex); + + bool isDefaultVoice(in DOMString aUri); + + bool isLocalVoice(in DOMString aUri); + + AString getVoiceLang(in DOMString aUri); + + AString getVoiceName(in DOMString aUri); +}; + +%{C++ +#define NS_SYNTHVOICEREGISTRY_CID \ + { /* {7090524d-5574-4492-a77f-d8d558ced59d} */ \ + 0x7090524d, \ + 0x5574, \ + 0x4492, \ + { 0xa7, 0x7f, 0xd8, 0xd5, 0x58, 0xce, 0xd5, 0x9d } \ + } + +#define NS_SYNTHVOICEREGISTRY_CONTRACTID \ + "@mozilla.org/synth-voice-registry;1" + +#define NS_SYNTHVOICEREGISTRY_CLASSNAME \ + "Speech Synthesis Voice Registry" + +%} diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp new file mode 100644 index 000000000..d687f5122 --- /dev/null +++ b/dom/media/webspeech/synth/nsSpeechTask.cpp @@ -0,0 +1,783 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioChannelAgent.h" +#include "AudioChannelService.h" +#include "AudioSegment.h" +#include "MediaStreamListener.h" +#include "nsSpeechTask.h" +#include "nsSynthVoiceRegistry.h" +#include "SharedBuffer.h" +#include "SpeechSynthesis.h" + +// GetCurrentTime is defined in winbase.h as zero argument macro forwarding to +// GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime(). +#ifdef GetCurrentTime +#undef GetCurrentTime +#endif + +#undef LOG +extern mozilla::LogModule* GetSpeechSynthLog(); +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +#define AUDIO_TRACK 1 + +namespace mozilla { +namespace dom { + +class SynthStreamListener : public MediaStreamListener +{ +public: + explicit SynthStreamListener(nsSpeechTask* aSpeechTask, + MediaStream* aStream) : + mSpeechTask(aSpeechTask), + mStream(aStream), + mStarted(false) + { + } + + void DoNotifyStarted() + { + if (mSpeechTask) { + mSpeechTask->DispatchStartInner(); + } + } + + void DoNotifyFinished() + { + if (mSpeechTask) { + mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(), + mSpeechTask->GetCurrentCharOffset()); + } + } + + void NotifyEvent(MediaStreamGraph* aGraph, + MediaStreamGraphEvent event) override + { + switch (event) { + case MediaStreamGraphEvent::EVENT_FINISHED: + { + if (!mStarted) { + mStarted = true; + nsCOMPtr startRunnable = + NewRunnableMethod(this, &SynthStreamListener::DoNotifyStarted); + aGraph->DispatchToMainThreadAfterStreamStateUpdate(startRunnable.forget()); + } + + nsCOMPtr endRunnable = + NewRunnableMethod(this, &SynthStreamListener::DoNotifyFinished); + aGraph->DispatchToMainThreadAfterStreamStateUpdate(endRunnable.forget()); + } + break; + case MediaStreamGraphEvent::EVENT_REMOVED: + mSpeechTask = nullptr; + // Dereference MediaStream to destroy safety + mStream = nullptr; + break; + default: + break; + } + } + + void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) override + { + if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) { + mStarted = true; + nsCOMPtr event = + NewRunnableMethod(this, &SynthStreamListener::DoNotifyStarted); + aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); + } + } + +private: + // Raw pointer; if we exist, the stream exists, + // and 'mSpeechTask' exclusively owns it and therefor exists as well. + nsSpeechTask* mSpeechTask; + // This is KungFuDeathGrip for MediaStream + RefPtr mStream; + + bool mStarted; +}; + +// nsSpeechTask + +NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance, mCallback); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask) + NS_INTERFACE_MAP_ENTRY(nsISpeechTask) + NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) + +nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance) + : mUtterance(aUtterance) + , mInited(false) + , mPrePaused(false) + , mPreCanceled(false) + , mCallback(nullptr) + , mIndirectAudio(false) +{ + mText = aUtterance->mText; + mVolume = aUtterance->Volume(); +} + +nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText) + : mUtterance(nullptr) + , mVolume(aVolume) + , mText(aText) + , mInited(false) + , mPrePaused(false) + , mPreCanceled(false) + , mCallback(nullptr) + , mIndirectAudio(false) +{ +} + +nsSpeechTask::~nsSpeechTask() +{ + LOG(LogLevel::Debug, ("~nsSpeechTask")); + if (mStream) { + if (!mStream->IsDestroyed()) { + mStream->Destroy(); + } + + // This will finally destroyed by SynthStreamListener becasue + // MediaStream::Destroy() is async. + mStream = nullptr; + } + + if (mPort) { + mPort->Destroy(); + mPort = nullptr; + } +} + +void +nsSpeechTask::InitDirectAudio() +{ + mStream = MediaStreamGraph::GetInstance(MediaStreamGraph::AUDIO_THREAD_DRIVER, + AudioChannel::Normal)-> + CreateSourceStream(); + mIndirectAudio = false; + mInited = true; +} + +void +nsSpeechTask::InitIndirectAudio() +{ + mIndirectAudio = true; + mInited = true; +} + +void +nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) +{ + mChosenVoiceURI = aUri; +} + +NS_IMETHODIMP +nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback, + uint32_t aChannels, uint32_t aRate, uint8_t argc) +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + LOG(LogLevel::Debug, ("nsSpeechTask::Setup")); + + mCallback = aCallback; + + if (mIndirectAudio) { + MOZ_ASSERT(!mStream); + if (argc > 0) { + NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services."); + } + return NS_OK; + } + + // mStream is set up in Init() that should be called before this. + MOZ_ASSERT(mStream); + + mStream->AddListener(new SynthStreamListener(this, mStream)); + + // XXX: Support more than one channel + if(NS_WARN_IF(!(aChannels == 1))) { + return NS_ERROR_FAILURE; + } + + mChannels = aChannels; + + AudioSegment* segment = new AudioSegment(); + mStream->AddAudioTrack(AUDIO_TRACK, aRate, 0, segment); + mStream->AddAudioOutput(this); + mStream->SetAudioOutputVolume(this, mVolume); + + return NS_OK; +} + +static RefPtr +makeSamples(int16_t* aData, uint32_t aDataLen) +{ + RefPtr samples = + SharedBuffer::Create(aDataLen * sizeof(int16_t)); + int16_t* frames = static_cast(samples->Data()); + + for (uint32_t i = 0; i < aDataLen; i++) { + frames[i] = aData[i]; + } + + return samples; +} + +NS_IMETHODIMP +nsSpeechTask::SendAudio(JS::Handle aData, JS::Handle aLandmarks, + JSContext* aCx) +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + if(NS_WARN_IF(!(mStream))) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(mStream->IsDestroyed())) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(!(mChannels))) { + return NS_ERROR_FAILURE; + } + if(NS_WARN_IF(!(aData.isObject()))) { + return NS_ERROR_INVALID_ARG; + } + + if (mIndirectAudio) { + NS_WARNING("Can't call SendAudio from an indirect audio speech service."); + return NS_ERROR_FAILURE; + } + + JS::Rooted darray(aCx, &aData.toObject()); + JSAutoCompartment ac(aCx, darray); + + JS::Rooted tsrc(aCx, nullptr); + + // Allow either Int16Array or plain JS Array + if (JS_IsInt16Array(darray)) { + tsrc = darray; + } else { + bool isArray; + if (!JS_IsArrayObject(aCx, darray, &isArray)) { + return NS_ERROR_UNEXPECTED; + } + if (isArray) { + tsrc = JS_NewInt16ArrayFromArray(aCx, darray); + } + } + + if (!tsrc) { + return NS_ERROR_DOM_TYPE_MISMATCH_ERR; + } + + uint32_t dataLen = JS_GetTypedArrayLength(tsrc); + RefPtr samples; + { + JS::AutoCheckCannotGC nogc; + bool isShared; + int16_t* data = JS_GetInt16ArrayData(tsrc, &isShared, nogc); + if (isShared) { + // Must opt in to using shared data. + return NS_ERROR_DOM_TYPE_MISMATCH_ERR; + } + samples = makeSamples(data, dataLen); + } + SendAudioImpl(samples, dataLen); + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen) +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + if(NS_WARN_IF(!(mStream))) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(mStream->IsDestroyed())) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(!(mChannels))) { + return NS_ERROR_FAILURE; + } + + if (mIndirectAudio) { + NS_WARNING("Can't call SendAudio from an indirect audio speech service."); + return NS_ERROR_FAILURE; + } + + RefPtr samples = makeSamples(aData, aDataLen); + SendAudioImpl(samples, aDataLen); + + return NS_OK; +} + +void +nsSpeechTask::SendAudioImpl(RefPtr& aSamples, uint32_t aDataLen) +{ + if (aDataLen == 0) { + mStream->EndAllTrackAndFinish(); + return; + } + + AudioSegment segment; + AutoTArray channelData; + channelData.AppendElement(static_cast(aSamples->Data())); + segment.AppendFrames(aSamples.forget(), channelData, aDataLen, + PRINCIPAL_HANDLE_NONE); + mStream->AppendToTrack(1, &segment); + mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX); +} + +NS_IMETHODIMP +nsSpeechTask::DispatchStart() +{ + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchStart() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + return DispatchStartInner(); +} + +nsresult +nsSpeechTask::DispatchStartInner() +{ + nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true); + return DispatchStartImpl(); +} + +nsresult +nsSpeechTask::DispatchStartImpl() +{ + return DispatchStartImpl(mChosenVoiceURI); +} + +nsresult +nsSpeechTask::DispatchStartImpl(const nsAString& aUri) +{ + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStart")); + + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING))) { + return NS_ERROR_NOT_AVAILABLE; + } + + CreateAudioChannelAgent(); + + mUtterance->mState = SpeechSynthesisUtterance::STATE_SPEAKING; + mUtterance->mChosenVoiceURI = aUri; + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("start"), 0, 0, + EmptyString()); + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) +{ + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchEnd() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + return DispatchEndInner(aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex) +{ + if (!mPreCanceled) { + nsSynthVoiceRegistry::GetInstance()->SpeakNext(); + } + + return DispatchEndImpl(aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) +{ + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEnd\n")); + + DestroyAudioChannelAgent(); + + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + // XXX: This should not be here, but it prevents a crash in MSG. + if (mStream) { + mStream->Destroy(); + } + + RefPtr utterance = mUtterance; + + if (mSpeechSynthesis) { + mSpeechSynthesis->OnEnd(this); + } + + if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) { + utterance->mState = SpeechSynthesisUtterance::STATE_NONE; + } else { + utterance->mState = SpeechSynthesisUtterance::STATE_ENDED; + utterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("end"), + aCharIndex, aElapsedTime, + EmptyString()); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) +{ + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchPause() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + return DispatchPauseImpl(aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) +{ + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPause")); + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(mUtterance->mPaused)) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + mUtterance->mPaused = true; + if (mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING) { + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"), + aCharIndex, aElapsedTime, + EmptyString()); + } + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) +{ + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchResume() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + return DispatchResumeImpl(aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) +{ + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResume")); + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(!(mUtterance->mPaused))) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + mUtterance->mPaused = false; + if (mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING) { + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"), + aCharIndex, aElapsedTime, + EmptyString()); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) +{ + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError")); + + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchError() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + if (!mPreCanceled) { + nsSynthVoiceRegistry::GetInstance()->SpeakNext(); + } + + return DispatchErrorImpl(aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + if (mSpeechSynthesis) { + mSpeechSynthesis->OnEnd(this); + } + + mUtterance->mState = SpeechSynthesisUtterance::STATE_ENDED; + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("error"), + aCharIndex, aElapsedTime, + EmptyString()); + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchBoundary(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex) +{ + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING))) { + return NS_ERROR_NOT_AVAILABLE; + } + + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("boundary"), + aCharIndex, aElapsedTime, + aName); + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchMark(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex) +{ + if (!mIndirectAudio) { + NS_WARNING("Can't call DispatchMark() from a direct audio speech service"); + return NS_ERROR_FAILURE; + } + + return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchMarkImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex) +{ + MOZ_ASSERT(mUtterance); + if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING))) { + return NS_ERROR_NOT_AVAILABLE; + } + + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("mark"), + aCharIndex, aElapsedTime, + aName); + return NS_OK; +} + +void +nsSpeechTask::Pause() +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + if (mCallback) { + DebugOnly rv = mCallback->OnPause(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); + } + + if (mStream) { + mStream->Suspend(); + } + + if (!mInited) { + mPrePaused = true; + } + + if (!mIndirectAudio) { + DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset()); + } +} + +void +nsSpeechTask::Resume() +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + if (mCallback) { + DebugOnly rv = mCallback->OnResume(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Unable to call onResume() callback"); + } + + if (mStream) { + mStream->Resume(); + } + + if (mPrePaused) { + mPrePaused = false; + nsSynthVoiceRegistry::GetInstance()->ResumeQueue(); + } + + if (!mIndirectAudio) { + DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset()); + } +} + +void +nsSpeechTask::Cancel() +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + LOG(LogLevel::Debug, ("nsSpeechTask::Cancel")); + + if (mCallback) { + DebugOnly rv = mCallback->OnCancel(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Unable to call onCancel() callback"); + } + + if (mStream) { + mStream->Suspend(); + } + + if (!mInited) { + mPreCanceled = true; + } + + if (!mIndirectAudio) { + DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset()); + } +} + +void +nsSpeechTask::ForceEnd() +{ + if (mStream) { + mStream->Suspend(); + } + + if (!mInited) { + mPreCanceled = true; + } + + DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset()); +} + +float +nsSpeechTask::GetCurrentTime() +{ + return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0; +} + +uint32_t +nsSpeechTask::GetCurrentCharOffset() +{ + return mStream && mStream->IsFinished() ? mText.Length() : 0; +} + +void +nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) +{ + mSpeechSynthesis = aSpeechSynthesis; +} + +void +nsSpeechTask::CreateAudioChannelAgent() +{ + if (!mUtterance) { + return; + } + + if (mAudioChannelAgent) { + mAudioChannelAgent->NotifyStoppedPlaying(); + } + + mAudioChannelAgent = new AudioChannelAgent(); + mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwner(), + static_cast(AudioChannelService::GetDefaultAudioChannel()), + this); + + AudioPlaybackConfig config; + nsresult rv = mAudioChannelAgent->NotifyStartedPlaying(&config, + AudioChannelService::AudibleState::eAudible); + if (NS_WARN_IF(NS_FAILED(rv))) { + return; + } + + WindowVolumeChanged(config.mVolume, config.mMuted); + WindowSuspendChanged(config.mSuspend); +} + +void +nsSpeechTask::DestroyAudioChannelAgent() +{ + if (mAudioChannelAgent) { + mAudioChannelAgent->NotifyStoppedPlaying(); + mAudioChannelAgent = nullptr; + } +} + +NS_IMETHODIMP +nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) +{ + SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume); + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) +{ + if (!mUtterance) { + return NS_OK; + } + + if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && + mUtterance->mPaused) { + Resume(); + } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED && + !mUtterance->mPaused) { + Pause(); + } + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) +{ + // This is not supported yet. + return NS_OK; +} + +void +nsSpeechTask::SetAudioOutputVolume(float aVolume) +{ + if (mStream && !mStream->IsDestroyed()) { + mStream->SetAudioOutputVolume(this, aVolume); + } + if (mIndirectAudio) { + mCallback->OnVolumeChanged(aVolume); + } +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h new file mode 100644 index 000000000..c2c5dba84 --- /dev/null +++ b/dom/media/webspeech/synth/nsSpeechTask.h @@ -0,0 +1,139 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_nsSpeechTask_h +#define mozilla_dom_nsSpeechTask_h + +#include "MediaStreamGraph.h" +#include "SpeechSynthesisUtterance.h" +#include "nsIAudioChannelAgent.h" +#include "nsISpeechService.h" + +namespace mozilla { + +class SharedBuffer; + +namespace dom { + +class SpeechSynthesisUtterance; +class SpeechSynthesis; +class SynthStreamListener; + +class nsSpeechTask : public nsISpeechTask + , public nsIAudioChannelAgentCallback + , public nsSupportsWeakReference +{ + friend class SynthStreamListener; + +public: + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSpeechTask, nsISpeechTask) + + NS_DECL_NSISPEECHTASK + NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK + + explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance); + nsSpeechTask(float aVolume, const nsAString& aText); + + virtual void Pause(); + + virtual void Resume(); + + virtual void Cancel(); + + virtual void ForceEnd(); + + float GetCurrentTime(); + + uint32_t GetCurrentCharOffset(); + + void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis); + + void InitDirectAudio(); + void InitIndirectAudio(); + + void SetChosenVoiceURI(const nsAString& aUri); + + virtual void SetAudioOutputVolume(float aVolume); + + bool IsPreCanceled() + { + return mPreCanceled; + }; + + bool IsPrePaused() + { + return mPrePaused; + } + +protected: + virtual ~nsSpeechTask(); + + nsresult DispatchStartImpl(); + + virtual nsresult DispatchStartImpl(const nsAString& aUri); + + virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, + uint32_t aCharIndex); + + virtual nsresult DispatchMarkImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex); + + RefPtr mUtterance; + + float mVolume; + + nsString mText; + + bool mInited; + + bool mPrePaused; + + bool mPreCanceled; + +private: + void End(); + + void SendAudioImpl(RefPtr& aSamples, uint32_t aDataLen); + + nsresult DispatchStartInner(); + + nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex); + + void CreateAudioChannelAgent(); + + void DestroyAudioChannelAgent(); + + RefPtr mStream; + + RefPtr mPort; + + nsCOMPtr mCallback; + + nsCOMPtr mAudioChannelAgent; + + uint32_t mChannels; + + RefPtr mSpeechSynthesis; + + bool mIndirectAudio; + + nsString mChosenVoiceURI; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp new file mode 100644 index 000000000..b593479e3 --- /dev/null +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp @@ -0,0 +1,835 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsILocaleService.h" +#include "nsISpeechService.h" +#include "nsServiceManagerUtils.h" +#include "nsCategoryManagerUtils.h" + +#include "MediaPrefs.h" +#include "SpeechSynthesisUtterance.h" +#include "SpeechSynthesisVoice.h" +#include "nsSynthVoiceRegistry.h" +#include "nsSpeechTask.h" +#include "AudioChannelService.h" + +#include "nsString.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/dom/ContentChild.h" +#include "mozilla/dom/ContentParent.h" +#include "mozilla/Unused.h" + +#include "SpeechSynthesisChild.h" +#include "SpeechSynthesisParent.h" + +#undef LOG +extern mozilla::LogModule* GetSpeechSynthLog(); +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +namespace { + +void +GetAllSpeechSynthActors(InfallibleTArray& aActors) +{ + MOZ_ASSERT(NS_IsMainThread()); + MOZ_ASSERT(aActors.IsEmpty()); + + AutoTArray contentActors; + mozilla::dom::ContentParent::GetAll(contentActors); + + for (uint32_t contentIndex = 0; + contentIndex < contentActors.Length(); + ++contentIndex) { + MOZ_ASSERT(contentActors[contentIndex]); + + AutoTArray speechsynthActors; + contentActors[contentIndex]->ManagedPSpeechSynthesisParent(speechsynthActors); + + for (uint32_t speechsynthIndex = 0; + speechsynthIndex < speechsynthActors.Length(); + ++speechsynthIndex) { + MOZ_ASSERT(speechsynthActors[speechsynthIndex]); + + mozilla::dom::SpeechSynthesisParent* actor = + static_cast(speechsynthActors[speechsynthIndex]); + aActors.AppendElement(actor); + } + } +} + +} // namespace + +namespace mozilla { +namespace dom { + +// VoiceData + +class VoiceData final +{ +private: + // Private destructor, to discourage deletion outside of Release(): + ~VoiceData() {} + +public: + VoiceData(nsISpeechService* aService, const nsAString& aUri, + const nsAString& aName, const nsAString& aLang, + bool aIsLocal, bool aQueuesUtterances) + : mService(aService) + , mUri(aUri) + , mName(aName) + , mLang(aLang) + , mIsLocal(aIsLocal) + , mIsQueued(aQueuesUtterances) {} + + NS_INLINE_DECL_REFCOUNTING(VoiceData) + + nsCOMPtr mService; + + nsString mUri; + + nsString mName; + + nsString mLang; + + bool mIsLocal; + + bool mIsQueued; +}; + +// GlobalQueueItem + +class GlobalQueueItem final +{ +private: + // Private destructor, to discourage deletion outside of Release(): + ~GlobalQueueItem() {} + +public: + GlobalQueueItem(VoiceData* aVoice, nsSpeechTask* aTask, const nsAString& aText, + const float& aVolume, const float& aRate, const float& aPitch) + : mVoice(aVoice) + , mTask(aTask) + , mText(aText) + , mVolume(aVolume) + , mRate(aRate) + , mPitch(aPitch) {} + + NS_INLINE_DECL_REFCOUNTING(GlobalQueueItem) + + RefPtr mVoice; + + RefPtr mTask; + + nsString mText; + + float mVolume; + + float mRate; + + float mPitch; + + bool mIsLocal; +}; + +// nsSynthVoiceRegistry + +static StaticRefPtr gSynthVoiceRegistry; + +NS_IMPL_ISUPPORTS(nsSynthVoiceRegistry, nsISynthVoiceRegistry) + +nsSynthVoiceRegistry::nsSynthVoiceRegistry() + : mSpeechSynthChild(nullptr) + , mUseGlobalQueue(false) + , mIsSpeaking(false) +{ + if (XRE_IsContentProcess()) { + + mSpeechSynthChild = new SpeechSynthesisChild(); + ContentChild::GetSingleton()->SendPSpeechSynthesisConstructor(mSpeechSynthChild); + + InfallibleTArray voices; + InfallibleTArray defaults; + bool isSpeaking; + + mSpeechSynthChild->SendReadVoicesAndState(&voices, &defaults, &isSpeaking); + + for (uint32_t i = 0; i < voices.Length(); ++i) { + RemoteVoice voice = voices[i]; + AddVoiceImpl(nullptr, voice.voiceURI(), + voice.name(), voice.lang(), + voice.localService(), voice.queued()); + } + + for (uint32_t i = 0; i < defaults.Length(); ++i) { + SetDefaultVoice(defaults[i], true); + } + + mIsSpeaking = isSpeaking; + } +} + +nsSynthVoiceRegistry::~nsSynthVoiceRegistry() +{ + LOG(LogLevel::Debug, ("~nsSynthVoiceRegistry")); + + // mSpeechSynthChild's lifecycle is managed by the Content protocol. + mSpeechSynthChild = nullptr; + + mUriVoiceMap.Clear(); +} + +nsSynthVoiceRegistry* +nsSynthVoiceRegistry::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + + if (!gSynthVoiceRegistry) { + gSynthVoiceRegistry = new nsSynthVoiceRegistry(); + if (XRE_IsParentProcess()) { + // Start up all speech synth services. + NS_CreateServicesFromCategory(NS_SPEECH_SYNTH_STARTED, nullptr, + NS_SPEECH_SYNTH_STARTED); + } + } + + return gSynthVoiceRegistry; +} + +already_AddRefed +nsSynthVoiceRegistry::GetInstanceForService() +{ + RefPtr registry = GetInstance(); + + return registry.forget(); +} + +void +nsSynthVoiceRegistry::Shutdown() +{ + LOG(LogLevel::Debug, ("[%s] nsSynthVoiceRegistry::Shutdown()", + (XRE_IsContentProcess()) ? "Content" : "Default")); + gSynthVoiceRegistry = nullptr; +} + +void +nsSynthVoiceRegistry::SendVoicesAndState(InfallibleTArray* aVoices, + InfallibleTArray* aDefaults, + bool* aIsSpeaking) +{ + for (uint32_t i=0; i < mVoices.Length(); ++i) { + RefPtr voice = mVoices[i]; + + aVoices->AppendElement(RemoteVoice(voice->mUri, voice->mName, voice->mLang, + voice->mIsLocal, voice->mIsQueued)); + } + + for (uint32_t i=0; i < mDefaultVoices.Length(); ++i) { + aDefaults->AppendElement(mDefaultVoices[i]->mUri); + } + + *aIsSpeaking = IsSpeaking(); +} + +void +nsSynthVoiceRegistry::RecvRemoveVoice(const nsAString& aUri) +{ + // If we dont have a local instance of the registry yet, we will recieve current + // voices at contruction time. + if(!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->RemoveVoice(nullptr, aUri); +} + +void +nsSynthVoiceRegistry::RecvAddVoice(const RemoteVoice& aVoice) +{ + // If we dont have a local instance of the registry yet, we will recieve current + // voices at contruction time. + if(!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->AddVoiceImpl(nullptr, aVoice.voiceURI(), + aVoice.name(), aVoice.lang(), + aVoice.localService(), aVoice.queued()); +} + +void +nsSynthVoiceRegistry::RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault) +{ + // If we dont have a local instance of the registry yet, we will recieve current + // voices at contruction time. + if(!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->SetDefaultVoice(aUri, aIsDefault); +} + +void +nsSynthVoiceRegistry::RecvIsSpeakingChanged(bool aIsSpeaking) +{ + // If we dont have a local instance of the registry yet, we will get the + // speaking state on construction. + if(!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking; +} + +void +nsSynthVoiceRegistry::RecvNotifyVoicesChanged() +{ + // If we dont have a local instance of the registry yet, we don't care. + if(!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->NotifyVoicesChanged(); +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::AddVoice(nsISpeechService* aService, + const nsAString& aUri, + const nsAString& aName, + const nsAString& aLang, + bool aLocalService, + bool aQueuesUtterances) +{ + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::AddVoice uri='%s' name='%s' lang='%s' local=%s queued=%s", + NS_ConvertUTF16toUTF8(aUri).get(), NS_ConvertUTF16toUTF8(aName).get(), + NS_ConvertUTF16toUTF8(aLang).get(), + aLocalService ? "true" : "false", + aQueuesUtterances ? "true" : "false")); + + if(NS_WARN_IF(XRE_IsContentProcess())) { + return NS_ERROR_NOT_AVAILABLE; + } + + return AddVoiceImpl(aService, aUri, aName, aLang, aLocalService, aQueuesUtterances); +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::RemoveVoice(nsISpeechService* aService, + const nsAString& aUri) +{ + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::RemoveVoice uri='%s' (%s)", + NS_ConvertUTF16toUTF8(aUri).get(), + (XRE_IsContentProcess()) ? "child" : "parent")); + + bool found = false; + VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found); + + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + if(NS_WARN_IF(!(aService == retval->mService))) { + return NS_ERROR_INVALID_ARG; + } + + mVoices.RemoveElement(retval); + mDefaultVoices.RemoveElement(retval); + mUriVoiceMap.Remove(aUri); + + if (retval->mIsQueued && !MediaPrefs::WebSpeechForceGlobal()) { + // Check if this is the last queued voice, and disable the global queue if + // it is. + bool queued = false; + for (uint32_t i = 0; i < mVoices.Length(); i++) { + VoiceData* voice = mVoices[i]; + if (voice->mIsQueued) { + queued = true; + break; + } + } + if (!queued) { + mUseGlobalQueue = false; + } + } + + nsTArray ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) + Unused << ssplist[i]->SendVoiceRemoved(nsString(aUri)); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::NotifyVoicesChanged() +{ + if (XRE_IsParentProcess()) { + nsTArray ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) + Unused << ssplist[i]->SendNotifyVoicesChanged(); + } + + nsCOMPtr obs = mozilla::services::GetObserverService(); + if(NS_WARN_IF(!(obs))) { + return NS_ERROR_NOT_AVAILABLE; + } + + obs->NotifyObservers(nullptr, "synth-voices-changed", nullptr); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::SetDefaultVoice(const nsAString& aUri, + bool aIsDefault) +{ + bool found = false; + VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found); + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + mDefaultVoices.RemoveElement(retval); + + LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::SetDefaultVoice %s %s", + NS_ConvertUTF16toUTF8(aUri).get(), + aIsDefault ? "true" : "false")); + + if (aIsDefault) { + mDefaultVoices.AppendElement(retval); + } + + if (XRE_IsParentProcess()) { + nsTArray ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendSetDefaultVoice(nsString(aUri), aIsDefault); + } + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoiceCount(uint32_t* aRetval) +{ + *aRetval = mVoices.Length(); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoice(uint32_t aIndex, nsAString& aRetval) +{ + if(NS_WARN_IF(!(aIndex < mVoices.Length()))) { + return NS_ERROR_INVALID_ARG; + } + + aRetval = mVoices[aIndex]->mUri; + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::IsDefaultVoice(const nsAString& aUri, bool* aRetval) +{ + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + for (int32_t i = mDefaultVoices.Length(); i > 0; ) { + VoiceData* defaultVoice = mDefaultVoices[--i]; + + if (voice->mLang.Equals(defaultVoice->mLang)) { + *aRetval = voice == defaultVoice; + return NS_OK; + } + } + + *aRetval = false; + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::IsLocalVoice(const nsAString& aUri, bool* aRetval) +{ + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + *aRetval = voice->mIsLocal; + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoiceLang(const nsAString& aUri, nsAString& aRetval) +{ + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + aRetval = voice->mLang; + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoiceName(const nsAString& aUri, nsAString& aRetval) +{ + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + aRetval = voice->mName; + return NS_OK; +} + +nsresult +nsSynthVoiceRegistry::AddVoiceImpl(nsISpeechService* aService, + const nsAString& aUri, + const nsAString& aName, + const nsAString& aLang, + bool aLocalService, + bool aQueuesUtterances) +{ + bool found = false; + mUriVoiceMap.GetWeak(aUri, &found); + if(NS_WARN_IF(found)) { + return NS_ERROR_INVALID_ARG; + } + + RefPtr voice = new VoiceData(aService, aUri, aName, aLang, + aLocalService, aQueuesUtterances); + + mVoices.AppendElement(voice); + mUriVoiceMap.Put(aUri, voice); + mUseGlobalQueue |= aQueuesUtterances; + + nsTArray ssplist; + GetAllSpeechSynthActors(ssplist); + + if (!ssplist.IsEmpty()) { + mozilla::dom::RemoteVoice ssvoice(nsString(aUri), + nsString(aName), + nsString(aLang), + aLocalService, + aQueuesUtterances); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendVoiceAdded(ssvoice); + } + } + + return NS_OK; +} + +bool +nsSynthVoiceRegistry::FindVoiceByLang(const nsAString& aLang, + VoiceData** aRetval) +{ + nsAString::const_iterator dashPos, start, end; + aLang.BeginReading(start); + aLang.EndReading(end); + + while (true) { + nsAutoString langPrefix(Substring(start, end)); + + for (int32_t i = mDefaultVoices.Length(); i > 0; ) { + VoiceData* voice = mDefaultVoices[--i]; + + if (StringBeginsWith(voice->mLang, langPrefix)) { + *aRetval = voice; + return true; + } + } + + for (int32_t i = mVoices.Length(); i > 0; ) { + VoiceData* voice = mVoices[--i]; + + if (StringBeginsWith(voice->mLang, langPrefix)) { + *aRetval = voice; + return true; + } + } + + dashPos = end; + end = start; + + if (!RFindInReadable(NS_LITERAL_STRING("-"), end, dashPos)) { + break; + } + } + + return false; +} + +VoiceData* +nsSynthVoiceRegistry::FindBestMatch(const nsAString& aUri, + const nsAString& aLang) +{ + if (mVoices.IsEmpty()) { + return nullptr; + } + + bool found = false; + VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found); + + if (found) { + LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::FindBestMatch - Matched URI")); + return retval; + } + + // Try finding a match for given voice. + if (!aLang.IsVoid() && !aLang.IsEmpty()) { + if (FindVoiceByLang(aLang, &retval)) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::FindBestMatch - Matched language (%s ~= %s)", + NS_ConvertUTF16toUTF8(aLang).get(), + NS_ConvertUTF16toUTF8(retval->mLang).get())); + + return retval; + } + } + + // Try UI language. + nsresult rv; + nsCOMPtr localeService = do_GetService(NS_LOCALESERVICE_CONTRACTID, &rv); + if (NS_WARN_IF(NS_FAILED(rv))) { + return nullptr; + } + + nsAutoString uiLang; + rv = localeService->GetLocaleComponentForUserAgent(uiLang); + if (NS_WARN_IF(NS_FAILED(rv))) { + return nullptr; + } + + if (FindVoiceByLang(uiLang, &retval)) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::FindBestMatch - Matched UI language (%s ~= %s)", + NS_ConvertUTF16toUTF8(uiLang).get(), + NS_ConvertUTF16toUTF8(retval->mLang).get())); + + return retval; + } + + // Try en-US, the language of locale "C" + if (FindVoiceByLang(NS_LITERAL_STRING("en-US"), &retval)) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::FindBestMatch - Matched C locale language (en-US ~= %s)", + NS_ConvertUTF16toUTF8(retval->mLang).get())); + + return retval; + } + + // The top default voice is better than nothing... + if (!mDefaultVoices.IsEmpty()) { + return mDefaultVoices.LastElement(); + } + + return nullptr; +} + +already_AddRefed +nsSynthVoiceRegistry::SpeakUtterance(SpeechSynthesisUtterance& aUtterance, + const nsAString& aDocLang) +{ + nsString lang = nsString(aUtterance.mLang.IsEmpty() ? aDocLang : aUtterance.mLang); + nsAutoString uri; + + if (aUtterance.mVoice) { + aUtterance.mVoice->GetVoiceURI(uri); + } + + // Get current audio volume to apply speech call + float volume = aUtterance.Volume(); + RefPtr service = AudioChannelService::GetOrCreate(); + if (service) { + if (nsCOMPtr topWindow = aUtterance.GetOwner()) { + // TODO : use audio channel agent, open new bug to fix it. + uint32_t channel = static_cast(AudioChannelService::GetDefaultAudioChannel()); + AudioPlaybackConfig config = service->GetMediaConfig(topWindow->GetOuterWindow(), + channel); + volume = config.mMuted ? 0.0f : config.mVolume * volume; + } + } + + RefPtr task; + if (XRE_IsContentProcess()) { + task = new SpeechTaskChild(&aUtterance); + SpeechSynthesisRequestChild* actor = + new SpeechSynthesisRequestChild(static_cast(task.get())); + mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(actor, + aUtterance.mText, + lang, + uri, + volume, + aUtterance.Rate(), + aUtterance.Pitch()); + } else { + task = new nsSpeechTask(&aUtterance); + Speak(aUtterance.mText, lang, uri, + volume, aUtterance.Rate(), aUtterance.Pitch(), task); + } + + return task.forget(); +} + +void +nsSynthVoiceRegistry::Speak(const nsAString& aText, + const nsAString& aLang, + const nsAString& aUri, + const float& aVolume, + const float& aRate, + const float& aPitch, + nsSpeechTask* aTask) +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + VoiceData* voice = FindBestMatch(aUri, aLang); + + if (!voice) { + NS_WARNING("No voices found."); + aTask->DispatchError(0, 0); + return; + } + + aTask->SetChosenVoiceURI(voice->mUri); + + if (mUseGlobalQueue || MediaPrefs::WebSpeechForceGlobal()) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' rate=%f pitch=%f", + NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(), + NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch)); + RefPtr item = new GlobalQueueItem(voice, aTask, aText, + aVolume, aRate, aPitch); + mGlobalQueue.AppendElement(item); + + if (mGlobalQueue.Length() == 1) { + SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, item->mRate, + item->mPitch); + } + } else { + SpeakImpl(voice, aTask, aText, aVolume, aRate, aPitch); + } +} + +void +nsSynthVoiceRegistry::SpeakNext() +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::SpeakNext %d", mGlobalQueue.IsEmpty())); + + SetIsSpeaking(false); + + if (mGlobalQueue.IsEmpty()) { + return; + } + + mGlobalQueue.RemoveElementAt(0); + + while (!mGlobalQueue.IsEmpty()) { + RefPtr item = mGlobalQueue.ElementAt(0); + if (item->mTask->IsPreCanceled()) { + mGlobalQueue.RemoveElementAt(0); + continue; + } + if (!item->mTask->IsPrePaused()) { + SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, + item->mRate, item->mPitch); + } + break; + } +} + +void +nsSynthVoiceRegistry::ResumeQueue() +{ + MOZ_ASSERT(XRE_IsParentProcess()); + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::ResumeQueue %d", mGlobalQueue.IsEmpty())); + + if (mGlobalQueue.IsEmpty()) { + return; + } + + RefPtr item = mGlobalQueue.ElementAt(0); + if (!item->mTask->IsPrePaused()) { + SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, + item->mRate, item->mPitch); + } +} + +bool +nsSynthVoiceRegistry::IsSpeaking() +{ + return mIsSpeaking; +} + +void +nsSynthVoiceRegistry::SetIsSpeaking(bool aIsSpeaking) +{ + MOZ_ASSERT(XRE_IsParentProcess()); + + // Only set to 'true' if global queue is enabled. + mIsSpeaking = + aIsSpeaking && (mUseGlobalQueue || MediaPrefs::WebSpeechForceGlobal()); + + nsTArray ssplist; + GetAllSpeechSynthActors(ssplist); + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendIsSpeakingChanged(aIsSpeaking); + } +} + +void +nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice, + nsSpeechTask* aTask, + const nsAString& aText, + const float& aVolume, + const float& aRate, + const float& aPitch) +{ + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f pitch=%f", + NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(), + aRate, aPitch)); + + SpeechServiceType serviceType; + + DebugOnly rv = aVoice->mService->GetServiceType(&serviceType); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to get speech service type"); + + if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) { + aTask->InitIndirectAudio(); + } else { + aTask->InitDirectAudio(); + } + + if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate, + aPitch, aTask))) { + if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) { + aTask->DispatchError(0, 0); + } + // XXX When using direct audio, no way to dispatch error + } +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h new file mode 100644 index 000000000..08e9dae7d --- /dev/null +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_nsSynthVoiceRegistry_h +#define mozilla_dom_nsSynthVoiceRegistry_h + +#include "nsISynthVoiceRegistry.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" +#include "MediaStreamGraph.h" + +class nsISpeechService; + +namespace mozilla { +namespace dom { + +class RemoteVoice; +class SpeechSynthesisUtterance; +class SpeechSynthesisChild; +class nsSpeechTask; +class VoiceData; +class GlobalQueueItem; + +class nsSynthVoiceRegistry final : public nsISynthVoiceRegistry +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSISYNTHVOICEREGISTRY + + nsSynthVoiceRegistry(); + + already_AddRefed SpeakUtterance(SpeechSynthesisUtterance& aUtterance, + const nsAString& aDocLang); + + void Speak(const nsAString& aText, const nsAString& aLang, + const nsAString& aUri, const float& aVolume, const float& aRate, + const float& aPitch, nsSpeechTask* aTask); + + void SendVoicesAndState(InfallibleTArray* aVoices, + InfallibleTArray* aDefaults, + bool* aIsSpeaking); + + void SpeakNext(); + + void ResumeQueue(); + + bool IsSpeaking(); + + void SetIsSpeaking(bool aIsSpeaking); + + static nsSynthVoiceRegistry* GetInstance(); + + static already_AddRefed GetInstanceForService(); + + static void RecvRemoveVoice(const nsAString& aUri); + + static void RecvAddVoice(const RemoteVoice& aVoice); + + static void RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault); + + static void RecvIsSpeakingChanged(bool aIsSpeaking); + + static void RecvNotifyVoicesChanged(); + + static void Shutdown(); + +private: + virtual ~nsSynthVoiceRegistry(); + + VoiceData* FindBestMatch(const nsAString& aUri, const nsAString& lang); + + bool FindVoiceByLang(const nsAString& aLang, VoiceData** aRetval); + + nsresult AddVoiceImpl(nsISpeechService* aService, + const nsAString& aUri, + const nsAString& aName, + const nsAString& aLang, + bool aLocalService, + bool aQueuesUtterances); + + void SpeakImpl(VoiceData* aVoice, + nsSpeechTask* aTask, + const nsAString& aText, + const float& aVolume, + const float& aRate, + const float& aPitch); + + nsTArray> mVoices; + + nsTArray> mDefaultVoices; + + nsRefPtrHashtable mUriVoiceMap; + + SpeechSynthesisChild* mSpeechSynthChild; + + bool mUseGlobalQueue; + + nsTArray> mGlobalQueue; + + bool mIsSpeaking; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/pico/PicoModule.cpp b/dom/media/webspeech/synth/pico/PicoModule.cpp new file mode 100644 index 000000000..4d5b6fe07 --- /dev/null +++ b/dom/media/webspeech/synth/pico/PicoModule.cpp @@ -0,0 +1,58 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsIClassInfoImpl.h" + +#ifdef MOZ_WEBRTC + +#include "nsPicoService.h" + +using namespace mozilla::dom; + +#define PICOSERVICE_CID \ + {0x346c4fc8, 0x12fe, 0x459c, {0x81, 0x19, 0x9a, 0xa7, 0x73, 0x37, 0x7f, 0xf4}} + +#define PICOSERVICE_CONTRACTID "@mozilla.org/synthpico;1" + +// Defines nsPicoServiceConstructor +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsPicoService, + nsPicoService::GetInstanceForService) + +// Defines kPICOSERVICE_CID +NS_DEFINE_NAMED_CID(PICOSERVICE_CID); + +static const mozilla::Module::CIDEntry kCIDs[] = { + { &kPICOSERVICE_CID, true, nullptr, nsPicoServiceConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kContracts[] = { + { PICOSERVICE_CONTRACTID, &kPICOSERVICE_CID }, + { nullptr } +}; + +static const mozilla::Module::CategoryEntry kCategories[] = { + { "profile-after-change", "Pico Speech Synth", PICOSERVICE_CONTRACTID }, + { nullptr } +}; + +static void +UnloadPicoModule() +{ + nsPicoService::Shutdown(); +} + +static const mozilla::Module kModule = { + mozilla::Module::kVersion, + kCIDs, + kContracts, + kCategories, + nullptr, + nullptr, + UnloadPicoModule +}; + +NSMODULE_DEFN(synthpico) = &kModule; +#endif diff --git a/dom/media/webspeech/synth/pico/moz.build b/dom/media/webspeech/synth/pico/moz.build new file mode 100644 index 000000000..01ef30450 --- /dev/null +++ b/dom/media/webspeech/synth/pico/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + 'nsPicoService.cpp', + 'PicoModule.cpp' +] +include('/ipc/chromium/chromium-config.mozbuild') + +FINAL_LIBRARY = 'xul' diff --git a/dom/media/webspeech/synth/pico/nsPicoService.cpp b/dom/media/webspeech/synth/pico/nsPicoService.cpp new file mode 100644 index 000000000..c3cf812fc --- /dev/null +++ b/dom/media/webspeech/synth/pico/nsPicoService.cpp @@ -0,0 +1,761 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsPicoService.h" +#include "nsPrintfCString.h" +#include "nsIWeakReferenceUtils.h" +#include "SharedBuffer.h" +#include "nsISimpleEnumerator.h" + +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" + +#include "nsIFile.h" +#include "nsThreadUtils.h" +#include "prenv.h" +#include "mozilla/Preferences.h" +#include "mozilla/DebugOnly.h" +#include + +// Pico API constants + +// Size of memory allocated for pico engine and voice resources. +// We only have one voice and its resources loaded at once, so this +// should always be enough. +#define PICO_MEM_SIZE 2500000 + +// Max length of returned strings. Pico will never return longer strings, +// so this amount should be good enough for preallocating. +#define PICO_RETSTRINGSIZE 200 + +// Max amount we want from a single call of pico_getData +#define PICO_MAX_CHUNK_SIZE 128 + +// Arbitrary name for loaded voice, it doesn't mean anything outside of Pico +#define PICO_VOICE_NAME "pico" + +// Return status from pico_getData meaning there is more data in the pipeline +// to get from more calls to pico_getData +#define PICO_STEP_BUSY 201 + +// For performing a "soft" reset between utterances. This is used when one +// utterance is interrupted by a new one. +#define PICO_RESET_SOFT 0x10 + +// Currently, Pico only provides mono output. +#define PICO_CHANNELS_NUM 1 + +// Pico's sample rate is always 16000 +#define PICO_SAMPLE_RATE 16000 + +// The path to the language files in Gonk +#define GONK_PICO_LANG_PATH "/system/tts/lang_pico" + +namespace mozilla { +namespace dom { + +StaticRefPtr nsPicoService::sSingleton; + +class PicoApi +{ +public: + + PicoApi() : mInitialized(false) {} + + bool Init() + { + if (mInitialized) { + return true; + } + + void* handle = dlopen("libttspico.so", RTLD_LAZY); + + if (!handle) { + NS_WARNING("Failed to open libttspico.so, pico cannot run"); + return false; + } + + pico_initialize = + (pico_Status (*)(void*, uint32_t, pico_System*))dlsym( + handle, "pico_initialize"); + + pico_terminate = + (pico_Status (*)(pico_System*))dlsym(handle, "pico_terminate"); + + pico_getSystemStatusMessage = + (pico_Status (*)(pico_System, pico_Status, pico_Retstring))dlsym( + handle, "pico_getSystemStatusMessage");; + + pico_loadResource = + (pico_Status (*)(pico_System, const char*, pico_Resource*))dlsym( + handle, "pico_loadResource"); + + pico_unloadResource = + (pico_Status (*)(pico_System, pico_Resource*))dlsym( + handle, "pico_unloadResource"); + + pico_getResourceName = + (pico_Status (*)(pico_System, pico_Resource, pico_Retstring))dlsym( + handle, "pico_getResourceName"); + + pico_createVoiceDefinition = + (pico_Status (*)(pico_System, const char*))dlsym( + handle, "pico_createVoiceDefinition"); + + pico_addResourceToVoiceDefinition = + (pico_Status (*)(pico_System, const char*, const char*))dlsym( + handle, "pico_addResourceToVoiceDefinition"); + + pico_releaseVoiceDefinition = + (pico_Status (*)(pico_System, const char*))dlsym( + handle, "pico_releaseVoiceDefinition"); + + pico_newEngine = + (pico_Status (*)(pico_System, const char*, pico_Engine*))dlsym( + handle, "pico_newEngine"); + + pico_disposeEngine = + (pico_Status (*)(pico_System, pico_Engine*))dlsym( + handle, "pico_disposeEngine"); + + pico_resetEngine = + (pico_Status (*)(pico_Engine, int32_t))dlsym(handle, "pico_resetEngine"); + + pico_putTextUtf8 = + (pico_Status (*)(pico_Engine, const char*, const int16_t, int16_t*))dlsym( + handle, "pico_putTextUtf8"); + + pico_getData = + (pico_Status (*)(pico_Engine, void*, int16_t, int16_t*, int16_t*))dlsym( + handle, "pico_getData"); + + mInitialized = true; + return true; + } + + typedef signed int pico_Status; + typedef char pico_Retstring[PICO_RETSTRINGSIZE]; + + pico_Status (* pico_initialize)(void*, uint32_t, pico_System*); + pico_Status (* pico_terminate)(pico_System*); + pico_Status (* pico_getSystemStatusMessage)( + pico_System, pico_Status, pico_Retstring); + + pico_Status (* pico_loadResource)(pico_System, const char*, pico_Resource*); + pico_Status (* pico_unloadResource)(pico_System, pico_Resource*); + pico_Status (* pico_getResourceName)( + pico_System, pico_Resource, pico_Retstring); + pico_Status (* pico_createVoiceDefinition)(pico_System, const char*); + pico_Status (* pico_addResourceToVoiceDefinition)( + pico_System, const char*, const char*); + pico_Status (* pico_releaseVoiceDefinition)(pico_System, const char*); + pico_Status (* pico_newEngine)(pico_System, const char*, pico_Engine*); + pico_Status (* pico_disposeEngine)(pico_System, pico_Engine*); + + pico_Status (* pico_resetEngine)(pico_Engine, int32_t); + pico_Status (* pico_putTextUtf8)( + pico_Engine, const char*, const int16_t, int16_t*); + pico_Status (* pico_getData)( + pico_Engine, void*, const int16_t, int16_t*, int16_t*); + +private: + + bool mInitialized; + +} sPicoApi; + +#define PICO_ENSURE_SUCCESS_VOID(_funcName, _status) \ + if (_status < 0) { \ + PicoApi::pico_Retstring message; \ + sPicoApi.pico_getSystemStatusMessage( \ + nsPicoService::sSingleton->mPicoSystem, _status, message); \ + NS_WARNING( \ + nsPrintfCString("Error running %s: %s", _funcName, message).get()); \ + return; \ + } + +#define PICO_ENSURE_SUCCESS(_funcName, _status, _rv) \ + if (_status < 0) { \ + PicoApi::pico_Retstring message; \ + sPicoApi.pico_getSystemStatusMessage( \ + nsPicoService::sSingleton->mPicoSystem, _status, message); \ + NS_WARNING( \ + nsPrintfCString("Error running %s: %s", _funcName, message).get()); \ + return _rv; \ + } + +class PicoVoice +{ +public: + + PicoVoice(const nsAString& aLanguage) + : mLanguage(aLanguage) {} + + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PicoVoice) + + // Voice language, in BCB-47 syntax + nsString mLanguage; + + // Language resource file + nsCString mTaFile; + + // Speaker resource file + nsCString mSgFile; + +private: + ~PicoVoice() {} +}; + +class PicoCallbackRunnable : public Runnable, + public nsISpeechTaskCallback +{ + friend class PicoSynthDataRunnable; + +public: + PicoCallbackRunnable(const nsAString& aText, PicoVoice* aVoice, + float aRate, float aPitch, nsISpeechTask* aTask, + nsPicoService* aService) + : mText(NS_ConvertUTF16toUTF8(aText)) + , mRate(aRate) + , mPitch(aPitch) + , mFirstData(true) + , mTask(aTask) + , mVoice(aVoice) + , mService(aService) { } + + NS_DECL_ISUPPORTS_INHERITED + NS_DECL_NSISPEECHTASKCALLBACK + + NS_IMETHOD Run() override; + + bool IsCurrentTask() { return mService->mCurrentTask == mTask; } + +private: + ~PicoCallbackRunnable() { } + + void DispatchSynthDataRunnable(already_AddRefed&& aBuffer, + size_t aBufferSize); + + nsCString mText; + + float mRate; + + float mPitch; + + bool mFirstData; + + // We use this pointer to compare it with the current service task. + // If they differ, this runnable should stop. + nsISpeechTask* mTask; + + // We hold a strong reference to the service, which in turn holds + // a strong reference to this voice. + PicoVoice* mVoice; + + // By holding a strong reference to the service we guarantee that it won't be + // destroyed before this runnable. + RefPtr mService; +}; + +NS_IMPL_ISUPPORTS_INHERITED(PicoCallbackRunnable, Runnable, nsISpeechTaskCallback) + +// Runnable + +NS_IMETHODIMP +PicoCallbackRunnable::Run() +{ + MOZ_ASSERT(!NS_IsMainThread()); + PicoApi::pico_Status status = 0; + + if (mService->CurrentVoice() != mVoice) { + mService->LoadEngine(mVoice); + } else { + status = sPicoApi.pico_resetEngine(mService->mPicoEngine, PICO_RESET_SOFT); + PICO_ENSURE_SUCCESS("pico_unloadResource", status, NS_ERROR_FAILURE); + } + + // Add SSML markup for pitch and rate. Pico uses a minimal parser, + // so no namespace is needed. + nsPrintfCString markedUpText( + "%s", + std::min(std::max(50.0f, mPitch * 100), 200.0f), + std::min(std::max(20.0f, mRate * 100), 500.0f), + mText.get()); + + const char* text = markedUpText.get(); + size_t buffer_size = 512, buffer_offset = 0; + RefPtr buffer = SharedBuffer::Create(buffer_size); + int16_t text_offset = 0, bytes_recv = 0, bytes_sent = 0, out_data_type = 0; + int16_t text_remaining = markedUpText.Length() + 1; + + // Run this loop while this is the current task + while (IsCurrentTask()) { + if (text_remaining) { + status = sPicoApi.pico_putTextUtf8(mService->mPicoEngine, + text + text_offset, text_remaining, + &bytes_sent); + PICO_ENSURE_SUCCESS("pico_putTextUtf8", status, NS_ERROR_FAILURE); + // XXX: End speech task on error + text_remaining -= bytes_sent; + text_offset += bytes_sent; + } else { + // If we already fed all the text to the engine, send a zero length buffer + // and quit. + DispatchSynthDataRunnable(already_AddRefed(), 0); + break; + } + + do { + // Run this loop while the result of getData is STEP_BUSY, when it finishes + // synthesizing audio for the given text, it returns STEP_IDLE. We then + // break to the outer loop and feed more text, if there is any left. + if (!IsCurrentTask()) { + // If the task has changed, quit. + break; + } + + if (buffer_size - buffer_offset < PICO_MAX_CHUNK_SIZE) { + // The next audio chunk retrieved may be bigger than our buffer, + // so send the data and flush the buffer. + DispatchSynthDataRunnable(buffer.forget(), buffer_offset); + buffer_offset = 0; + buffer = SharedBuffer::Create(buffer_size); + } + + status = sPicoApi.pico_getData(mService->mPicoEngine, + (uint8_t*)buffer->Data() + buffer_offset, + PICO_MAX_CHUNK_SIZE, + &bytes_recv, &out_data_type); + PICO_ENSURE_SUCCESS("pico_getData", status, NS_ERROR_FAILURE); + buffer_offset += bytes_recv; + } while (status == PICO_STEP_BUSY); + } + + return NS_OK; +} + +void +PicoCallbackRunnable::DispatchSynthDataRunnable( + already_AddRefed&& aBuffer, size_t aBufferSize) +{ + class PicoSynthDataRunnable final : public Runnable + { + public: + PicoSynthDataRunnable(already_AddRefed& aBuffer, + size_t aBufferSize, bool aFirstData, + PicoCallbackRunnable* aCallback) + : mBuffer(aBuffer) + , mBufferSize(aBufferSize) + , mFirstData(aFirstData) + , mCallback(aCallback) { + } + + NS_IMETHOD Run() override + { + MOZ_ASSERT(NS_IsMainThread()); + + if (!mCallback->IsCurrentTask()) { + return NS_ERROR_NOT_AVAILABLE; + } + + nsISpeechTask* task = mCallback->mTask; + + if (mFirstData) { + task->Setup(mCallback, PICO_CHANNELS_NUM, PICO_SAMPLE_RATE, 2); + } + + return task->SendAudioNative( + mBufferSize ? static_cast(mBuffer->Data()) : nullptr, mBufferSize / 2); + } + + private: + RefPtr mBuffer; + + size_t mBufferSize; + + bool mFirstData; + + RefPtr mCallback; + }; + + nsCOMPtr sendEvent = + new PicoSynthDataRunnable(aBuffer, aBufferSize, mFirstData, this); + NS_DispatchToMainThread(sendEvent); + mFirstData = false; +} + +// nsISpeechTaskCallback + +NS_IMETHODIMP +PicoCallbackRunnable::OnPause() +{ + return NS_OK; +} + +NS_IMETHODIMP +PicoCallbackRunnable::OnResume() +{ + return NS_OK; +} + +NS_IMETHODIMP +PicoCallbackRunnable::OnCancel() +{ + mService->mCurrentTask = nullptr; + return NS_OK; +} + +NS_IMETHODIMP +PicoCallbackRunnable::OnVolumeChanged(float aVolume) +{ + return NS_OK; +} + +NS_INTERFACE_MAP_BEGIN(nsPicoService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(nsPicoService) +NS_IMPL_RELEASE(nsPicoService) + +nsPicoService::nsPicoService() + : mInitialized(false) + , mVoicesMonitor("nsPicoService::mVoices") + , mCurrentTask(nullptr) + , mPicoSystem(nullptr) + , mPicoEngine(nullptr) + , mSgResource(nullptr) + , mTaResource(nullptr) + , mPicoMemArea(nullptr) +{ +} + +nsPicoService::~nsPicoService() +{ + // We don't worry about removing the voices because this gets + // destructed at shutdown along with the voice registry. + MonitorAutoLock autoLock(mVoicesMonitor); + mVoices.Clear(); + + if (mThread) { + mThread->Shutdown(); + } + + UnloadEngine(); +} + +// nsIObserver + +NS_IMETHODIMP +nsPicoService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + MOZ_ASSERT(NS_IsMainThread()); + if(NS_WARN_IF(!(!strcmp(aTopic, "profile-after-change")))) { + return NS_ERROR_UNEXPECTED; + } + + if (!Preferences::GetBool("media.webspeech.synth.enabled") || + Preferences::GetBool("media.webspeech.synth.test")) { + return NS_OK; + } + + DebugOnly rv = NS_NewNamedThread("Pico Worker", getter_AddRefs(mThread)); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + return mThread->Dispatch( + NewRunnableMethod(this, &nsPicoService::Init), NS_DISPATCH_NORMAL); +} +// nsISpeechService + +NS_IMETHODIMP +nsPicoService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + if(NS_WARN_IF(!(mInitialized))) { + return NS_ERROR_NOT_AVAILABLE; + } + + MonitorAutoLock autoLock(mVoicesMonitor); + bool found = false; + PicoVoice* voice = mVoices.GetWeak(aUri, &found); + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + mCurrentTask = aTask; + RefPtr cb = new PicoCallbackRunnable(aText, voice, aRate, aPitch, aTask, this); + return mThread->Dispatch(cb, NS_DISPATCH_NORMAL); +} + +NS_IMETHODIMP +nsPicoService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO; + return NS_OK; +} + +// private methods + +void +nsPicoService::Init() +{ + MOZ_ASSERT(!NS_IsMainThread()); + MOZ_ASSERT(!mInitialized); + + if (!sPicoApi.Init()) { + NS_WARNING("Failed to initialize pico library"); + return; + } + + // Use environment variable, or default android/b2g path + nsAutoCString langPath(PR_GetEnv("PICO_LANG_PATH")); + + if (langPath.IsEmpty()) { + langPath.AssignLiteral(GONK_PICO_LANG_PATH); + } + + nsCOMPtr voicesDir; + NS_NewNativeLocalFile(langPath, true, getter_AddRefs(voicesDir)); + + nsCOMPtr dirIterator; + nsresult rv = voicesDir->GetDirectoryEntries(getter_AddRefs(dirIterator)); + + if (NS_FAILED(rv)) { + NS_WARNING(nsPrintfCString("Failed to get contents of directory: %s", langPath.get()).get()); + return; + } + + bool hasMoreElements = false; + rv = dirIterator->HasMoreElements(&hasMoreElements); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + + MonitorAutoLock autoLock(mVoicesMonitor); + + while (hasMoreElements && NS_SUCCEEDED(rv)) { + nsCOMPtr supports; + rv = dirIterator->GetNext(getter_AddRefs(supports)); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + + nsCOMPtr voiceFile = do_QueryInterface(supports); + MOZ_ASSERT(voiceFile); + + nsAutoCString leafName; + voiceFile->GetNativeLeafName(leafName); + + nsAutoString lang; + + if (GetVoiceFileLanguage(leafName, lang)) { + nsAutoString uri; + uri.AssignLiteral("urn:moz-tts:pico:"); + uri.Append(lang); + + bool found = false; + PicoVoice* voice = mVoices.GetWeak(uri, &found); + + if (!found) { + voice = new PicoVoice(lang); + mVoices.Put(uri, voice); + } + + // Each voice consists of two lingware files: A language resource file, + // suffixed by _ta.bin, and a speaker resource file, suffixed by _sb.bin. + // We currently assume that there is a pair of files for each language. + if (StringEndsWith(leafName, NS_LITERAL_CSTRING("_ta.bin"))) { + rv = voiceFile->GetPersistentDescriptor(voice->mTaFile); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + } else if (StringEndsWith(leafName, NS_LITERAL_CSTRING("_sg.bin"))) { + rv = voiceFile->GetPersistentDescriptor(voice->mSgFile); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + } + } + + rv = dirIterator->HasMoreElements(&hasMoreElements); + } + + NS_DispatchToMainThread(NewRunnableMethod(this, &nsPicoService::RegisterVoices)); +} + +void +nsPicoService::RegisterVoices() +{ + nsSynthVoiceRegistry* registry = nsSynthVoiceRegistry::GetInstance(); + + for (auto iter = mVoices.Iter(); !iter.Done(); iter.Next()) { + const nsAString& uri = iter.Key(); + RefPtr& voice = iter.Data(); + + // If we are missing either a language or a voice resource, it is invalid. + if (voice->mTaFile.IsEmpty() || voice->mSgFile.IsEmpty()) { + iter.Remove(); + continue; + } + + nsAutoString name; + name.AssignLiteral("Pico "); + name.Append(voice->mLanguage); + + // This service is multi-threaded and can handle more than one utterance at a + // time before previous utterances end. So, aQueuesUtterances == false + DebugOnly rv = + registry->AddVoice(this, uri, name, voice->mLanguage, true, false); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice"); + } + + mInitialized = true; +} + +bool +nsPicoService::GetVoiceFileLanguage(const nsACString& aFileName, nsAString& aLang) +{ + nsACString::const_iterator start, end; + aFileName.BeginReading(start); + aFileName.EndReading(end); + + // The lingware filename syntax is language_(ta/sg).bin, + // we extract the language prefix here. + if (FindInReadable(NS_LITERAL_CSTRING("_"), start, end)) { + end = start; + aFileName.BeginReading(start); + aLang.Assign(NS_ConvertUTF8toUTF16(Substring(start, end))); + return true; + } + + return false; +} + +void +nsPicoService::LoadEngine(PicoVoice* aVoice) +{ + PicoApi::pico_Status status = 0; + + if (mPicoSystem) { + UnloadEngine(); + } + + if (!mPicoMemArea) { + mPicoMemArea = MakeUnique(PICO_MEM_SIZE); + } + + status = sPicoApi.pico_initialize(mPicoMemArea.get(), + PICO_MEM_SIZE, &mPicoSystem); + PICO_ENSURE_SUCCESS_VOID("pico_initialize", status); + + status = sPicoApi.pico_loadResource(mPicoSystem, aVoice->mTaFile.get(), &mTaResource); + PICO_ENSURE_SUCCESS_VOID("pico_loadResource", status); + + status = sPicoApi.pico_loadResource(mPicoSystem, aVoice->mSgFile.get(), &mSgResource); + PICO_ENSURE_SUCCESS_VOID("pico_loadResource", status); + + status = sPicoApi.pico_createVoiceDefinition(mPicoSystem, PICO_VOICE_NAME); + PICO_ENSURE_SUCCESS_VOID("pico_createVoiceDefinition", status); + + char taName[PICO_RETSTRINGSIZE]; + status = sPicoApi.pico_getResourceName(mPicoSystem, mTaResource, taName); + PICO_ENSURE_SUCCESS_VOID("pico_getResourceName", status); + + status = sPicoApi.pico_addResourceToVoiceDefinition( + mPicoSystem, PICO_VOICE_NAME, taName); + PICO_ENSURE_SUCCESS_VOID("pico_addResourceToVoiceDefinition", status); + + char sgName[PICO_RETSTRINGSIZE]; + status = sPicoApi.pico_getResourceName(mPicoSystem, mSgResource, sgName); + PICO_ENSURE_SUCCESS_VOID("pico_getResourceName", status); + + status = sPicoApi.pico_addResourceToVoiceDefinition( + mPicoSystem, PICO_VOICE_NAME, sgName); + PICO_ENSURE_SUCCESS_VOID("pico_addResourceToVoiceDefinition", status); + + status = sPicoApi.pico_newEngine(mPicoSystem, PICO_VOICE_NAME, &mPicoEngine); + PICO_ENSURE_SUCCESS_VOID("pico_newEngine", status); + + if (sSingleton) { + sSingleton->mCurrentVoice = aVoice; + } +} + +void +nsPicoService::UnloadEngine() +{ + PicoApi::pico_Status status = 0; + + if (mPicoEngine) { + status = sPicoApi.pico_disposeEngine(mPicoSystem, &mPicoEngine); + PICO_ENSURE_SUCCESS_VOID("pico_disposeEngine", status); + status = sPicoApi.pico_releaseVoiceDefinition(mPicoSystem, PICO_VOICE_NAME); + PICO_ENSURE_SUCCESS_VOID("pico_releaseVoiceDefinition", status); + mPicoEngine = nullptr; + } + + if (mSgResource) { + status = sPicoApi.pico_unloadResource(mPicoSystem, &mSgResource); + PICO_ENSURE_SUCCESS_VOID("pico_unloadResource", status); + mSgResource = nullptr; + } + + if (mTaResource) { + status = sPicoApi.pico_unloadResource(mPicoSystem, &mTaResource); + PICO_ENSURE_SUCCESS_VOID("pico_unloadResource", status); + mTaResource = nullptr; + } + + if (mPicoSystem) { + status = sPicoApi.pico_terminate(&mPicoSystem); + PICO_ENSURE_SUCCESS_VOID("pico_terminate", status); + mPicoSystem = nullptr; + } +} + +PicoVoice* +nsPicoService::CurrentVoice() +{ + MOZ_ASSERT(!NS_IsMainThread()); + + return mCurrentVoice; +} + +// static methods + +nsPicoService* +nsPicoService::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (!XRE_IsParentProcess()) { + MOZ_ASSERT(false, "nsPicoService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton) { + sSingleton = new nsPicoService(); + } + + return sSingleton; +} + +already_AddRefed +nsPicoService::GetInstanceForService() +{ + RefPtr picoService = GetInstance(); + return picoService.forget(); +} + +void +nsPicoService::Shutdown() +{ + if (!sSingleton) { + return; + } + + sSingleton->mCurrentTask = nullptr; + + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/pico/nsPicoService.h b/dom/media/webspeech/synth/pico/nsPicoService.h new file mode 100644 index 000000000..f47258d9d --- /dev/null +++ b/dom/media/webspeech/synth/pico/nsPicoService.h @@ -0,0 +1,93 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPicoService_h +#define nsPicoService_h + +#include "mozilla/Mutex.h" +#include "nsTArray.h" +#include "nsIObserver.h" +#include "nsIThread.h" +#include "nsISpeechService.h" +#include "nsRefPtrHashtable.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/Monitor.h" +#include "mozilla/UniquePtr.h" + +namespace mozilla { +namespace dom { + +class PicoVoice; +class PicoCallbackRunnable; + +typedef void* pico_System; +typedef void* pico_Resource; +typedef void* pico_Engine; + +class nsPicoService : public nsIObserver, + public nsISpeechService +{ + friend class PicoCallbackRunnable; + friend class PicoInitRunnable; + +public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + NS_DECL_NSIOBSERVER + + nsPicoService(); + + static nsPicoService* GetInstance(); + + static already_AddRefed GetInstanceForService(); + + static void Shutdown(); + +private: + + virtual ~nsPicoService(); + + void Init(); + + void RegisterVoices(); + + bool GetVoiceFileLanguage(const nsACString& aFileName, nsAString& aLang); + + void LoadEngine(PicoVoice* aVoice); + + void UnloadEngine(); + + PicoVoice* CurrentVoice(); + + bool mInitialized; + + nsCOMPtr mThread; + + nsRefPtrHashtable mVoices; + + Monitor mVoicesMonitor; + + PicoVoice* mCurrentVoice; + + Atomic mCurrentTask; + + pico_System mPicoSystem; + + pico_Engine mPicoEngine; + + pico_Resource mSgResource; + + pico_Resource mTaResource; + + mozilla::UniquePtr mPicoMemArea; + + static StaticRefPtr sSingleton; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherModule.cpp b/dom/media/webspeech/synth/speechd/SpeechDispatcherModule.cpp new file mode 100644 index 000000000..a7f7ad535 --- /dev/null +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherModule.cpp @@ -0,0 +1,56 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsIClassInfoImpl.h" +#include "SpeechDispatcherService.h" + +using namespace mozilla::dom; + +#define SPEECHDISPATCHERSERVICE_CID \ + {0x8817b1cf, 0x5ada, 0x43bf, {0xbd, 0x73, 0x60, 0x76, 0x57, 0x70, 0x3d, 0x0d}} + +#define SPEECHDISPATCHERSERVICE_CONTRACTID "@mozilla.org/synthspeechdispatcher;1" + +// Defines SpeechDispatcherServiceConstructor +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(SpeechDispatcherService, + SpeechDispatcherService::GetInstanceForService) + +// Defines kSPEECHDISPATCHERSERVICE_CID +NS_DEFINE_NAMED_CID(SPEECHDISPATCHERSERVICE_CID); + +static const mozilla::Module::CIDEntry kCIDs[] = { + { &kSPEECHDISPATCHERSERVICE_CID, true, nullptr, SpeechDispatcherServiceConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kContracts[] = { + { SPEECHDISPATCHERSERVICE_CONTRACTID, &kSPEECHDISPATCHERSERVICE_CID }, + { nullptr } +}; + +static const mozilla::Module::CategoryEntry kCategories[] = { + { "speech-synth-started", "SpeechDispatcher Speech Synth", SPEECHDISPATCHERSERVICE_CONTRACTID }, + { nullptr } +}; + +static void +UnloadSpeechDispatcherModule() +{ + SpeechDispatcherService::Shutdown(); +} + +static const mozilla::Module kModule = { + mozilla::Module::kVersion, + kCIDs, + kContracts, + kCategories, + nullptr, + nullptr, + UnloadSpeechDispatcherModule +}; + +NSMODULE_DEFN(synthspeechdispatcher) = &kModule; diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp new file mode 100644 index 000000000..77a8f7cd9 --- /dev/null +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp @@ -0,0 +1,593 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechDispatcherService.h" + +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/Preferences.h" +#include "nsEscape.h" +#include "nsISupports.h" +#include "nsPrintfCString.h" +#include "nsReadableUtils.h" +#include "nsServiceManagerUtils.h" +#include "nsThreadUtils.h" +#include "prlink.h" + +#include +#include + +#define URI_PREFIX "urn:moz-tts:speechd:" + +#define MAX_RATE static_cast(2.5) +#define MIN_RATE static_cast(0.5) + +// Some structures for libspeechd +typedef enum { + SPD_EVENT_BEGIN, + SPD_EVENT_END, + SPD_EVENT_INDEX_MARK, + SPD_EVENT_CANCEL, + SPD_EVENT_PAUSE, + SPD_EVENT_RESUME +} SPDNotificationType; + +typedef enum { + SPD_BEGIN = 1, + SPD_END = 2, + SPD_INDEX_MARKS = 4, + SPD_CANCEL = 8, + SPD_PAUSE = 16, + SPD_RESUME = 32, + + SPD_ALL = 0x3f +} SPDNotification; + +typedef enum { + SPD_MODE_SINGLE = 0, + SPD_MODE_THREADED = 1 +} SPDConnectionMode; + +typedef void (*SPDCallback) (size_t msg_id, size_t client_id, + SPDNotificationType state); + +typedef void (*SPDCallbackIM) (size_t msg_id, size_t client_id, + SPDNotificationType state, char* index_mark); + +struct SPDConnection +{ + SPDCallback callback_begin; + SPDCallback callback_end; + SPDCallback callback_cancel; + SPDCallback callback_pause; + SPDCallback callback_resume; + SPDCallbackIM callback_im; + + /* partial, more private fields in structure */ +}; + +struct SPDVoice +{ + char* name; + char* language; + char* variant; +}; + +typedef enum { + SPD_IMPORTANT = 1, + SPD_MESSAGE = 2, + SPD_TEXT = 3, + SPD_NOTIFICATION = 4, + SPD_PROGRESS = 5 +} SPDPriority; + +#define SPEECHD_FUNCTIONS \ + FUNC(spd_open, SPDConnection*, (const char*, const char*, const char*, SPDConnectionMode)) \ + FUNC(spd_close, void, (SPDConnection*)) \ + FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*)) \ + FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*)) \ + FUNC(spd_cancel, int, (SPDConnection*)) \ + FUNC(spd_set_volume, int, (SPDConnection*, int)) \ + FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \ + FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \ + FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \ + FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification)) + +#define FUNC(name, type, params) \ + typedef type (*_##name##_fn) params; \ + static _##name##_fn _##name; + +SPEECHD_FUNCTIONS + +#undef FUNC + +#define spd_open _spd_open +#define spd_close _spd_close +#define spd_list_synthesis_voices _spd_list_synthesis_voices +#define spd_say _spd_say +#define spd_cancel _spd_cancel +#define spd_set_volume _spd_set_volume +#define spd_set_voice_rate _spd_set_voice_rate +#define spd_set_voice_pitch _spd_set_voice_pitch +#define spd_set_synthesis_voice _spd_set_synthesis_voice +#define spd_set_notification_on _spd_set_notification_on + +static PRLibrary* speechdLib = nullptr; + +typedef void (*nsSpeechDispatcherFunc)(); +struct nsSpeechDispatcherDynamicFunction +{ + const char* functionName; + nsSpeechDispatcherFunc* function; +}; + +namespace mozilla { +namespace dom { + +StaticRefPtr SpeechDispatcherService::sSingleton; + +class SpeechDispatcherVoice +{ +public: + + SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage) + : mName(aName), mLanguage(aLanguage) {} + + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice) + + // Voice name + nsString mName; + + // Voice language, in BCP-47 syntax + nsString mLanguage; + +private: + ~SpeechDispatcherVoice() {} +}; + + +class SpeechDispatcherCallback final : public nsISpeechTaskCallback +{ +public: + SpeechDispatcherCallback(nsISpeechTask* aTask, SpeechDispatcherService* aService) + : mTask(aTask) + , mService(aService) {} + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + bool OnSpeechEvent(SPDNotificationType state); + +private: + ~SpeechDispatcherCallback() { } + + // This pointer is used to dispatch events + nsCOMPtr mTask; + + // By holding a strong reference to the service we guarantee that it won't be + // destroyed before this runnable. + RefPtr mService; + + TimeStamp mStartTime; +}; + +NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback) + +NS_IMETHODIMP +SpeechDispatcherCallback::OnPause() +{ + // XXX: Speech dispatcher does not pause immediately, but waits for the speech + // to reach an index mark so that it could resume from that offset. + // There is no support for word or sentence boundaries, so index marks would + // only occur in explicit SSML marks, and we don't support that yet. + // What in actuality happens, is that if you call spd_pause(), it will speak + // the utterance in its entirety, dispatch an end event, and then put speechd + // in a 'paused' state. Since it is after the utterance ended, we don't get + // that state change, and our speech api is in an unrecoverable state. + // So, since it is useless anyway, I am not implementing pause. + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnResume() +{ + // XXX: Unsupported, see OnPause(). + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnCancel() +{ + if (spd_cancel(mService->mSpeechdClient) < 0) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnVolumeChanged(float aVolume) +{ + // XXX: This currently does not change the volume mid-utterance, but it + // doesn't do anything bad either. So we could put this here with the hopes + // that speechd supports this in the future. + if (spd_set_volume(mService->mSpeechdClient, static_cast(aVolume * 100)) < 0) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +bool +SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) +{ + bool remove = false; + + switch (state) { + case SPD_EVENT_BEGIN: + mStartTime = TimeStamp::Now(); + mTask->DispatchStart(); + break; + + case SPD_EVENT_PAUSE: + mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + break; + + case SPD_EVENT_RESUME: + mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + break; + + case SPD_EVENT_CANCEL: + case SPD_EVENT_END: + mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + remove = true; + break; + + case SPD_EVENT_INDEX_MARK: + // Not yet supported + break; + + default: + break; + } + + return remove; +} + +static void +speechd_cb(size_t msg_id, size_t client_id, SPDNotificationType state) +{ + SpeechDispatcherService* service = SpeechDispatcherService::GetInstance(false); + + if (service) { + NS_DispatchToMainThread( + NewRunnableMethod( + service, &SpeechDispatcherService::EventNotify, + static_cast(msg_id), state)); + } +} + + +NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(SpeechDispatcherService) +NS_IMPL_RELEASE(SpeechDispatcherService) + +SpeechDispatcherService::SpeechDispatcherService() + : mInitialized(false) + , mSpeechdClient(nullptr) +{ +} + +void +SpeechDispatcherService::Init() +{ + if (!Preferences::GetBool("media.webspeech.synth.enabled") || + Preferences::GetBool("media.webspeech.synth.test")) { + return; + } + + // While speech dispatcher has a "threaded" mode, only spd_say() is async. + // Since synchronous socket i/o could impact startup time, we do + // initialization in a separate thread. + DebugOnly rv = NS_NewNamedThread("speechd init", + getter_AddRefs(mInitThread)); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + rv = mInitThread->Dispatch( + NewRunnableMethod(this, &SpeechDispatcherService::Setup), NS_DISPATCH_NORMAL); + MOZ_ASSERT(NS_SUCCEEDED(rv)); +} + +SpeechDispatcherService::~SpeechDispatcherService() +{ + if (mInitThread) { + mInitThread->Shutdown(); + } + + if (mSpeechdClient) { + spd_close(mSpeechdClient); + } +} + +void +SpeechDispatcherService::Setup() +{ +#define FUNC(name, type, params) { #name, (nsSpeechDispatcherFunc *)&_##name }, + static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = { + SPEECHD_FUNCTIONS + }; +#undef FUNC + + MOZ_ASSERT(!mInitialized); + + speechdLib = PR_LoadLibrary("libspeechd.so.2"); + + if (!speechdLib) { + NS_WARNING("Failed to load speechd library"); + return; + } + + if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) { + // There is no version getter function, so we rely on a symbol that was + // introduced in release 0.8.2 in order to check for ABI compatibility. + NS_WARNING("Unsupported version of speechd detected"); + return; + } + + for (uint32_t i = 0; i < ArrayLength(kSpeechDispatcherSymbols); i++) { + *kSpeechDispatcherSymbols[i].function = + PR_FindFunctionSymbol(speechdLib, kSpeechDispatcherSymbols[i].functionName); + + if (!*kSpeechDispatcherSymbols[i].function) { + NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'", + kSpeechDispatcherSymbols[i].functionName).get()); + return; + } + } + + mSpeechdClient = spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED); + if (!mSpeechdClient) { + NS_WARNING("Failed to call spd_open"); + return; + } + + // Get all the voices from sapi and register in the SynthVoiceRegistry + SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient); + + mSpeechdClient->callback_begin = speechd_cb; + mSpeechdClient->callback_end = speechd_cb; + mSpeechdClient->callback_cancel = speechd_cb; + mSpeechdClient->callback_pause = speechd_cb; + mSpeechdClient->callback_resume = speechd_cb; + + spd_set_notification_on(mSpeechdClient, SPD_BEGIN); + spd_set_notification_on(mSpeechdClient, SPD_END); + spd_set_notification_on(mSpeechdClient, SPD_CANCEL); + + if (list != NULL) { + for (int i = 0; list[i]; i++) { + nsAutoString uri; + + uri.AssignLiteral(URI_PREFIX); + nsAutoCString name; + NS_EscapeURL(list[i]->name, -1, esc_OnlyNonASCII | esc_AlwaysCopy, name); + uri.Append(NS_ConvertUTF8toUTF16(name));; + uri.AppendLiteral("?"); + + nsAutoCString lang(list[i]->language); + + if (strcmp(list[i]->variant, "none") != 0) { + // In speech dispatcher, the variant will usually be the locale subtag + // with another, non-standard suptag after it. We keep the first one + // and convert it to uppercase. + const char* v = list[i]->variant; + const char* hyphen = strchr(v, '-'); + nsDependentCSubstring variant(v, hyphen ? hyphen - v : strlen(v)); + ToUpperCase(variant); + + // eSpeak uses UK which is not a valid region subtag in BCP47. + if (variant.Equals("UK")) { + variant.AssignLiteral("GB"); + } + + lang.AppendLiteral("-"); + lang.Append(variant); + } + + uri.Append(NS_ConvertUTF8toUTF16(lang)); + + mVoices.Put(uri, new SpeechDispatcherVoice( + NS_ConvertUTF8toUTF16(list[i]->name), + NS_ConvertUTF8toUTF16(lang))); + } + } + + NS_DispatchToMainThread(NewRunnableMethod(this, &SpeechDispatcherService::RegisterVoices)); + + //mInitialized = true; +} + +// private methods + +void +SpeechDispatcherService::RegisterVoices() +{ + RefPtr registry = nsSynthVoiceRegistry::GetInstance(); + for (auto iter = mVoices.Iter(); !iter.Done(); iter.Next()) { + RefPtr& voice = iter.Data(); + + // This service can only speak one utterance at a time, so we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + DebugOnly rv = + registry->AddVoice(this, iter.Key(), voice->mName, voice->mLanguage, + voice->mName.EqualsLiteral("default"), true); + + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice"); + } + + mInitThread->Shutdown(); + mInitThread = nullptr; + + mInitialized = true; + + registry->NotifyVoicesChanged(); +} + +// nsIObserver + +NS_IMETHODIMP +SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + return NS_OK; +} + +// nsISpeechService + +// TODO: Support SSML +NS_IMETHODIMP +SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + if (NS_WARN_IF(!mInitialized)) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr callback = + new SpeechDispatcherCallback(aTask, this); + + bool found = false; + SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found); + + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + spd_set_synthesis_voice(mSpeechdClient, + NS_ConvertUTF16toUTF8(voice->mName).get()); + + // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100. + spd_set_volume(mSpeechdClient, static_cast(aVolume * 100)); + + // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate. + // speechd expects -100 to 100 with 0 being normal rate. + float rate = 0; + if (aRate > 1) { + // Each step to 100 is logarithmically distributed up to 2.5x. + rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100; + } else if (aRate < 1) { + // Each step to -100 is logarithmically distributed down to 0.5x. + rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100; + } + + spd_set_voice_rate(mSpeechdClient, static_cast(rate)); + + // We provide a pitch of 0 to 2 with 1 being the default. + // speech-dispatcher expects -100 to 100 with 0 being default. + spd_set_voice_pitch(mSpeechdClient, static_cast((aPitch - 1) * 100)); + + // The last three parameters don't matter for an indirect service + nsresult rv = aTask->Setup(callback, 0, 0, 0); + + if (NS_FAILED(rv)) { + return rv; + } + + if (aText.Length()) { + int msg_id = spd_say( + mSpeechdClient, SPD_MESSAGE, NS_ConvertUTF16toUTF8(aText).get()); + + if (msg_id < 0) { + return NS_ERROR_FAILURE; + } + + mCallbacks.Put(msg_id, callback); + } else { + // Speech dispatcher does not work well with empty strings. + // In that case, don't send empty string to speechd, + // and just emulate a speechd start and end event. + NS_DispatchToMainThread(NewRunnableMethod( + callback, &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN)); + + NS_DispatchToMainThread(NewRunnableMethod( + callback, &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END)); + } + + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +SpeechDispatcherService* +SpeechDispatcherService::GetInstance(bool create) +{ + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT(false, + "SpeechDispatcherService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton && create) { + sSingleton = new SpeechDispatcherService(); + sSingleton->Init(); + } + + return sSingleton; +} + +already_AddRefed +SpeechDispatcherService::GetInstanceForService() +{ + MOZ_ASSERT(NS_IsMainThread()); + RefPtr sapiService = GetInstance(); + return sapiService.forget(); +} + +void +SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) +{ + SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId); + + if (callback) { + if (callback->OnSpeechEvent((SPDNotificationType)aState)) { + mCallbacks.Remove(aMsgId); + } + } +} + +void +SpeechDispatcherService::Shutdown() +{ + if (!sSingleton) { + return; + } + + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h new file mode 100644 index 000000000..07798ceda --- /dev/null +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h @@ -0,0 +1,67 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechDispatcherService_h +#define mozilla_dom_SpeechDispatcherService_h + +#include "mozilla/StaticPtr.h" +#include "nsIObserver.h" +#include "nsISpeechService.h" +#include "nsIThread.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" + +struct SPDConnection; + +namespace mozilla { +namespace dom { + +class SpeechDispatcherCallback; +class SpeechDispatcherVoice; + +class SpeechDispatcherService final : public nsIObserver, + public nsISpeechService +{ + friend class SpeechDispatcherCallback; +public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIOBSERVER + NS_DECL_NSISPEECHSERVICE + + SpeechDispatcherService(); + + void Init(); + + void Setup(); + + void EventNotify(uint32_t aMsgId, uint32_t aState); + + static SpeechDispatcherService* GetInstance(bool create = true); + static already_AddRefed GetInstanceForService(); + + static void Shutdown(); + + static StaticRefPtr sSingleton; + +private: + virtual ~SpeechDispatcherService(); + + void RegisterVoices(); + + bool mInitialized; + + SPDConnection* mSpeechdClient; + + nsRefPtrHashtable mCallbacks; + + nsCOMPtr mInitThread; + + nsRefPtrHashtable mVoices; +}; + +} // namespace dom +} // namespace mozilla +#endif diff --git a/dom/media/webspeech/synth/speechd/moz.build b/dom/media/webspeech/synth/speechd/moz.build new file mode 100644 index 000000000..51d675c10 --- /dev/null +++ b/dom/media/webspeech/synth/speechd/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + 'SpeechDispatcherModule.cpp', + 'SpeechDispatcherService.cpp' +] +include('/ipc/chromium/chromium-config.mozbuild') + +FINAL_LIBRARY = 'xul' diff --git a/dom/media/webspeech/synth/test/FakeSynthModule.cpp b/dom/media/webspeech/synth/test/FakeSynthModule.cpp new file mode 100644 index 000000000..5621ab78a --- /dev/null +++ b/dom/media/webspeech/synth/test/FakeSynthModule.cpp @@ -0,0 +1,55 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsIClassInfoImpl.h" + +#include "nsFakeSynthServices.h" + +using namespace mozilla::dom; + +#define FAKESYNTHSERVICE_CID \ + {0xe7d52d9e, 0xc148, 0x47d8, {0xab, 0x2a, 0x95, 0xd7, 0xf4, 0x0e, 0xa5, 0x3d}} + +#define FAKESYNTHSERVICE_CONTRACTID "@mozilla.org/fakesynth;1" + +// Defines nsFakeSynthServicesConstructor +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsFakeSynthServices, + nsFakeSynthServices::GetInstanceForService) + +// Defines kFAKESYNTHSERVICE_CID +NS_DEFINE_NAMED_CID(FAKESYNTHSERVICE_CID); + +static const mozilla::Module::CIDEntry kCIDs[] = { + { &kFAKESYNTHSERVICE_CID, true, nullptr, nsFakeSynthServicesConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kContracts[] = { + { FAKESYNTHSERVICE_CONTRACTID, &kFAKESYNTHSERVICE_CID }, + { nullptr } +}; + +static const mozilla::Module::CategoryEntry kCategories[] = { + { "speech-synth-started", "Fake Speech Synth", FAKESYNTHSERVICE_CONTRACTID }, + { nullptr } +}; + +static void +UnloadFakeSynthmodule() +{ + nsFakeSynthServices::Shutdown(); +} + +static const mozilla::Module kModule = { + mozilla::Module::kVersion, + kCIDs, + kContracts, + kCategories, + nullptr, + nullptr, + UnloadFakeSynthmodule +}; + +NSMODULE_DEFN(fakesynth) = &kModule; diff --git a/dom/media/webspeech/synth/test/common.js b/dom/media/webspeech/synth/test/common.js new file mode 100644 index 000000000..0ce9ec51b --- /dev/null +++ b/dom/media/webspeech/synth/test/common.js @@ -0,0 +1,91 @@ +function synthTestQueue(aTestArgs, aEndFunc) { + var utterances = []; + for (var i in aTestArgs) { + var uargs = aTestArgs[i][0]; + var win = uargs.win || window; + var u = new win.SpeechSynthesisUtterance(uargs.text); + + if (uargs.args) { + for (var attr in uargs.args) + u[attr] = uargs.args[attr]; + } + + function onend_handler(e) { + is(e.target, utterances.shift(), "Target matches utterances"); + ok(!speechSynthesis.speaking, "speechSynthesis is not speaking."); + + if (utterances.length) { + ok(speechSynthesis.pending, "other utterances queued"); + } else { + ok(!speechSynthesis.pending, "queue is empty, nothing pending."); + if (aEndFunc) + aEndFunc(); + } + } + + u.addEventListener('start', + (function (expectedUri) { + return function (e) { + if (expectedUri) { + var chosenVoice = SpecialPowers.wrap(e).target.chosenVoiceURI; + is(chosenVoice, expectedUri, "Incorrect URI is used"); + } + }; + })(aTestArgs[i][1] ? aTestArgs[i][1].uri : null)); + + u.addEventListener('end', onend_handler); + u.addEventListener('error', onend_handler); + + u.addEventListener('error', + (function (expectedError) { + return function onerror_handler(e) { + ok(expectedError, "Error in speech utterance '" + e.target.text + "'"); + }; + })(aTestArgs[i][1] ? aTestArgs[i][1].err : false)); + + utterances.push(u); + win.speechSynthesis.speak(u); + } + + ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet."); + ok(speechSynthesis.pending, "speechSynthesis has an utterance queued."); +} + +function loadFrame(frameId) { + return new Promise(function(resolve, reject) { + var frame = document.getElementById(frameId); + frame.addEventListener('load', function (e) { + frame.contentWindow.document.title = frameId; + resolve(frame); + }); + frame.src = 'data:text/html,' + encodeURI(''); + }); +} + +function waitForVoices(win) { + return new Promise(resolve => { + function resolver() { + if (win.speechSynthesis.getVoices().length) { + win.speechSynthesis.removeEventListener('voiceschanged', resolver); + resolve(); + } + } + + win.speechSynthesis.addEventListener('voiceschanged', resolver); + resolver(); + }); +} + +function loadSpeechTest(fileName, prefs, frameId="testFrame") { + loadFrame(frameId).then(frame => { + waitForVoices(frame.contentWindow).then( + () => document.getElementById("testFrame").src = fileName); + }); +} + +function testSynthState(win, expectedState) { + for (var attr in expectedState) { + is(win.speechSynthesis[attr], expectedState[attr], + win.document.title + ": '" + attr + '" does not match'); + } +} \ No newline at end of file diff --git a/dom/media/webspeech/synth/test/file_bfcache_frame.html b/dom/media/webspeech/synth/test/file_bfcache_frame.html new file mode 100644 index 000000000..c8663b7fb --- /dev/null +++ b/dom/media/webspeech/synth/test/file_bfcache_frame.html @@ -0,0 +1,28 @@ + + + + + + + + + diff --git a/dom/media/webspeech/synth/test/file_global_queue.html b/dom/media/webspeech/synth/test/file_global_queue.html new file mode 100644 index 000000000..5d762c0d5 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_global_queue.html @@ -0,0 +1,69 @@ + + + + + + Test for Bug 1188099: Global queue should correctly schedule utterances + + + + +Mozilla Bug 1188099 + + + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_global_queue_cancel.html b/dom/media/webspeech/synth/test/file_global_queue_cancel.html new file mode 100644 index 000000000..03b77ba2f --- /dev/null +++ b/dom/media/webspeech/synth/test/file_global_queue_cancel.html @@ -0,0 +1,88 @@ + + + + + + Test for Bug 1188099: Calling cancel() should work correctly with global queue + + + + +Mozilla Bug 1188099 + + + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_global_queue_pause.html b/dom/media/webspeech/synth/test/file_global_queue_pause.html new file mode 100644 index 000000000..7fd562133 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_global_queue_pause.html @@ -0,0 +1,131 @@ + + + + + + Test for Bug 1188099: Calling pause() should work correctly with global queue + + + + +Mozilla Bug 1188099 + + + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_indirect_service_events.html b/dom/media/webspeech/synth/test/file_indirect_service_events.html new file mode 100644 index 000000000..fb4e31244 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_indirect_service_events.html @@ -0,0 +1,102 @@ + + + + + + Test for Bug 1155034: Check that indirect audio services dispatch their own events + + + + +Mozilla Bug 1155034 +

+ +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_setup.html b/dom/media/webspeech/synth/test/file_setup.html new file mode 100644 index 000000000..4c1020505 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_setup.html @@ -0,0 +1,95 @@ + + + + + + Test for Bug 525444: Web Speech API check all classes are present + + + + +Mozilla Bug 650295 +

+ +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_speech_cancel.html b/dom/media/webspeech/synth/test/file_speech_cancel.html new file mode 100644 index 000000000..2ab0e1d0a --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_cancel.html @@ -0,0 +1,100 @@ + + + + + + Test for Bug 1150315: Check that successive cancel/speak calls work + + + + +Mozilla Bug 1150315 +

+ +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_speech_error.html b/dom/media/webspeech/synth/test/file_speech_error.html new file mode 100644 index 000000000..b98ec2fac --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_error.html @@ -0,0 +1,46 @@ + + + + + + Test for Bug 1226015 + + + + +Mozilla Bug 1226015 +

+ +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_speech_queue.html b/dom/media/webspeech/synth/test/file_speech_queue.html new file mode 100644 index 000000000..e308f35e5 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_queue.html @@ -0,0 +1,85 @@ + + + + + + Test for Bug 525444: Web Speech API, check speech synth queue + + + + +Mozilla Bug 525444 +

+ +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/file_speech_simple.html b/dom/media/webspeech/synth/test/file_speech_simple.html new file mode 100644 index 000000000..c3f240ccd --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_simple.html @@ -0,0 +1,53 @@ + + + + + + Test for Bug 650295: Web Speech API check all classes are present + + + + +Mozilla Bug 650295 +

+ +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/mochitest.ini b/dom/media/webspeech/synth/test/mochitest.ini new file mode 100644 index 000000000..f27cd4e1b --- /dev/null +++ b/dom/media/webspeech/synth/test/mochitest.ini @@ -0,0 +1,26 @@ +[DEFAULT] +tags=msg +subsuite = media +support-files = + common.js + file_bfcache_frame.html + file_setup.html + file_speech_queue.html + file_speech_simple.html + file_speech_cancel.html + file_speech_error.html + file_indirect_service_events.html + file_global_queue.html + file_global_queue_cancel.html + file_global_queue_pause.html + +[test_setup.html] +[test_speech_queue.html] +[test_speech_simple.html] +[test_speech_cancel.html] +[test_speech_error.html] +[test_indirect_service_events.html] +[test_global_queue.html] +[test_global_queue_cancel.html] +[test_global_queue_pause.html] +[test_bfcache.html] diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp new file mode 100644 index 000000000..582ff3551 --- /dev/null +++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp @@ -0,0 +1,401 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsFakeSynthServices.h" +#include "nsPrintfCString.h" +#include "nsIWeakReferenceUtils.h" +#include "SharedBuffer.h" +#include "nsISimpleEnumerator.h" + +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" + +#include "nsThreadUtils.h" +#include "prenv.h" +#include "mozilla/Preferences.h" +#include "mozilla/DebugOnly.h" + +#define CHANNELS 1 +#define SAMPLERATE 1600 + +namespace mozilla { +namespace dom { + +StaticRefPtr nsFakeSynthServices::sSingleton; + +enum VoiceFlags +{ + eSuppressEvents = 1, + eSuppressEnd = 2, + eFailAtStart = 4, + eFail = 8 +}; + +struct VoiceDetails +{ + const char* uri; + const char* name; + const char* lang; + bool defaultVoice; + uint32_t flags; +}; + +static const VoiceDetails sDirectVoices[] = { + {"urn:moz-tts:fake-direct:bob", "Bob Marley", "en-JM", true, 0}, + {"urn:moz-tts:fake-direct:amy", "Amy Winehouse", "en-GB", false, 0}, + {"urn:moz-tts:fake-direct:lenny", "Leonard Cohen", "en-CA", false, 0}, + {"urn:moz-tts:fake-direct:celine", "Celine Dion", "fr-CA", false, 0}, + {"urn:moz-tts:fake-direct:julie", "Julieta Venegas", "es-MX", false, }, +}; + +static const VoiceDetails sIndirectVoices[] = { + {"urn:moz-tts:fake-indirect:zanetta", "Zanetta Farussi", "it-IT", false, 0}, + {"urn:moz-tts:fake-indirect:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd}, + {"urn:moz-tts:fake-indirect:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd}, + {"urn:moz-tts:fake-indirect:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart}, + {"urn:moz-tts:fake-indirect:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail}, +}; + +// FakeSynthCallback +class FakeSynthCallback : public nsISpeechTaskCallback +{ +public: + explicit FakeSynthCallback(nsISpeechTask* aTask) : mTask(aTask) { } + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(FakeSynthCallback, nsISpeechTaskCallback) + + NS_IMETHOD OnPause() override + { + if (mTask) { + mTask->DispatchPause(1.5, 1); + } + + return NS_OK; + } + + NS_IMETHOD OnResume() override + { + if (mTask) { + mTask->DispatchResume(1.5, 1); + } + + return NS_OK; + } + + NS_IMETHOD OnCancel() override + { + if (mTask) { + mTask->DispatchEnd(1.5, 1); + } + + return NS_OK; + } + + NS_IMETHOD OnVolumeChanged(float aVolume) override + { + return NS_OK; + } + +private: + virtual ~FakeSynthCallback() { } + + nsCOMPtr mTask; +}; + +NS_IMPL_CYCLE_COLLECTION(FakeSynthCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FakeSynthCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback) + +// FakeDirectAudioSynth + +class FakeDirectAudioSynth : public nsISpeechService +{ + +public: + FakeDirectAudioSynth() { } + + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + +private: + virtual ~FakeDirectAudioSynth() { } +}; + +NS_IMPL_ISUPPORTS(FakeDirectAudioSynth, nsISpeechService) + +NS_IMETHODIMP +FakeDirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + class Runnable final : public mozilla::Runnable + { + public: + Runnable(nsISpeechTask* aTask, const nsAString& aText) : + mTask(aTask), mText(aText) + { + } + + NS_IMETHOD Run() override + { + RefPtr cb = new FakeSynthCallback(nullptr); + mTask->Setup(cb, CHANNELS, SAMPLERATE, 2); + + // Just an arbitrary multiplier. Pretend that each character is + // synthesized to 40 frames. + uint32_t frames_length = 40 * mText.Length(); + auto frames = MakeUnique(frames_length); + mTask->SendAudioNative(frames.get(), frames_length); + + mTask->SendAudioNative(nullptr, 0); + + return NS_OK; + } + + private: + nsCOMPtr mTask; + nsString mText; + }; + + nsCOMPtr runnable = new Runnable(aTask, aText); + NS_DispatchToMainThread(runnable); + return NS_OK; +} + +NS_IMETHODIMP +FakeDirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO; + return NS_OK; +} + +// FakeDirectAudioSynth + +class FakeIndirectAudioSynth : public nsISpeechService +{ + +public: + FakeIndirectAudioSynth() {} + + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + +private: + virtual ~FakeIndirectAudioSynth() { } +}; + +NS_IMPL_ISUPPORTS(FakeIndirectAudioSynth, nsISpeechService) + +NS_IMETHODIMP +FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + class DispatchStart final : public Runnable + { + public: + explicit DispatchStart(nsISpeechTask* aTask) : + mTask(aTask) + { + } + + NS_IMETHOD Run() override + { + mTask->DispatchStart(); + + return NS_OK; + } + + private: + nsCOMPtr mTask; + }; + + class DispatchEnd final : public Runnable + { + public: + DispatchEnd(nsISpeechTask* aTask, const nsAString& aText) : + mTask(aTask), mText(aText) + { + } + + NS_IMETHOD Run() override + { + mTask->DispatchEnd(mText.Length()/2, mText.Length()); + + return NS_OK; + } + + private: + nsCOMPtr mTask; + nsString mText; + }; + + class DispatchError final : public Runnable + { + public: + DispatchError(nsISpeechTask* aTask, const nsAString& aText) : + mTask(aTask), mText(aText) + { + } + + NS_IMETHOD Run() override + { + mTask->DispatchError(mText.Length()/2, mText.Length()); + + return NS_OK; + } + + private: + nsCOMPtr mTask; + nsString mText; + }; + + uint32_t flags = 0; + for (uint32_t i = 0; i < ArrayLength(sIndirectVoices); i++) { + if (aUri.EqualsASCII(sIndirectVoices[i].uri)) { + flags = sIndirectVoices[i].flags; + } + } + + if (flags & eFailAtStart) { + return NS_ERROR_FAILURE; + } + + RefPtr cb = new FakeSynthCallback( + (flags & eSuppressEvents) ? nullptr : aTask); + + aTask->Setup(cb, 0, 0, 0); + + nsCOMPtr runnable = new DispatchStart(aTask); + NS_DispatchToMainThread(runnable); + + if (flags & eFail) { + runnable = new DispatchError(aTask, aText); + NS_DispatchToMainThread(runnable); + } else if ((flags & eSuppressEnd) == 0) { + runnable = new DispatchEnd(aTask, aText); + NS_DispatchToMainThread(runnable); + } + + return NS_OK; +} + +NS_IMETHODIMP +FakeIndirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +// nsFakeSynthService + +NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(nsFakeSynthServices) +NS_IMPL_RELEASE(nsFakeSynthServices) + +nsFakeSynthServices::nsFakeSynthServices() +{ +} + +nsFakeSynthServices::~nsFakeSynthServices() +{ +} + +static void +AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLength) +{ + RefPtr registry = nsSynthVoiceRegistry::GetInstance(); + for (uint32_t i = 0; i < aLength; i++) { + NS_ConvertUTF8toUTF16 name(aVoices[i].name); + NS_ConvertUTF8toUTF16 uri(aVoices[i].uri); + NS_ConvertUTF8toUTF16 lang(aVoices[i].lang); + // These services can handle more than one utterance at a time and have + // several speaking simultaniously. So, aQueuesUtterances == false + registry->AddVoice(aService, uri, name, lang, true, false); + if (aVoices[i].defaultVoice) { + registry->SetDefaultVoice(uri, true); + } + } + + registry->NotifyVoicesChanged(); +} + +void +nsFakeSynthServices::Init() +{ + mDirectService = new FakeDirectAudioSynth(); + AddVoices(mDirectService, sDirectVoices, ArrayLength(sDirectVoices)); + + mIndirectService = new FakeIndirectAudioSynth(); + AddVoices(mIndirectService, sIndirectVoices, ArrayLength(sIndirectVoices)); +} + +// nsIObserver + +NS_IMETHODIMP +nsFakeSynthServices::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + MOZ_ASSERT(NS_IsMainThread()); + if(NS_WARN_IF(!(!strcmp(aTopic, "speech-synth-started")))) { + return NS_ERROR_UNEXPECTED; + } + + if (Preferences::GetBool("media.webspeech.synth.test")) { + NS_DispatchToMainThread(NewRunnableMethod(this, &nsFakeSynthServices::Init)); + } + + return NS_OK; +} + +// static methods + +nsFakeSynthServices* +nsFakeSynthServices::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (!XRE_IsParentProcess()) { + MOZ_ASSERT(false, "nsFakeSynthServices can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton) { + sSingleton = new nsFakeSynthServices(); + } + + return sSingleton; +} + +already_AddRefed +nsFakeSynthServices::GetInstanceForService() +{ + RefPtr picoService = GetInstance(); + return picoService.forget(); +} + +void +nsFakeSynthServices::Shutdown() +{ + if (!sSingleton) { + return; + } + + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.h b/dom/media/webspeech/synth/test/nsFakeSynthServices.h new file mode 100644 index 000000000..bab93e779 --- /dev/null +++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsFakeSynthServices_h +#define nsFakeSynthServices_h + +#include "nsTArray.h" +#include "nsIObserver.h" +#include "nsIThread.h" +#include "nsISpeechService.h" +#include "nsRefPtrHashtable.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/Monitor.h" + +namespace mozilla { +namespace dom { + +class nsFakeSynthServices : public nsIObserver +{ + +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIOBSERVER + + nsFakeSynthServices(); + + static nsFakeSynthServices* GetInstance(); + + static already_AddRefed GetInstanceForService(); + + static void Shutdown(); + +private: + + virtual ~nsFakeSynthServices(); + + void Init(); + + nsCOMPtr mDirectService; + + nsCOMPtr mIndirectService; + + static StaticRefPtr sSingleton; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/test/startup/file_voiceschanged.html b/dom/media/webspeech/synth/test/startup/file_voiceschanged.html new file mode 100644 index 000000000..6bb25462e --- /dev/null +++ b/dom/media/webspeech/synth/test/startup/file_voiceschanged.html @@ -0,0 +1,32 @@ + + + + + + Test for Bug 1254378: Web Speech API check all classes are present + + + + + + diff --git a/dom/media/webspeech/synth/test/startup/mochitest.ini b/dom/media/webspeech/synth/test/startup/mochitest.ini new file mode 100644 index 000000000..7312a71eb --- /dev/null +++ b/dom/media/webspeech/synth/test/startup/mochitest.ini @@ -0,0 +1,7 @@ +[DEFAULT] +tags=msg +subsuite = media +support-files = + file_voiceschanged.html + +[test_voiceschanged.html] diff --git a/dom/media/webspeech/synth/test/startup/test_voiceschanged.html b/dom/media/webspeech/synth/test/startup/test_voiceschanged.html new file mode 100644 index 000000000..079938c35 --- /dev/null +++ b/dom/media/webspeech/synth/test/startup/test_voiceschanged.html @@ -0,0 +1,32 @@ + + + + + + Test for Bug 1254378: Emit onvoiceschanged when voices first added + + + + +Mozilla Bug 1254378 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_bfcache.html b/dom/media/webspeech/synth/test/test_bfcache.html new file mode 100644 index 000000000..8681def3f --- /dev/null +++ b/dom/media/webspeech/synth/test/test_bfcache.html @@ -0,0 +1,45 @@ + + + + + + Test for Bug 1230533: Test speech is stopped from a window when unloaded + + + + + +Mozilla Bug 1230533 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_global_queue.html b/dom/media/webspeech/synth/test/test_global_queue.html new file mode 100644 index 000000000..34d3a4ed3 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_global_queue.html @@ -0,0 +1,35 @@ + + + + + + Test for Bug 1188099: Global queue should correctly schedule utterances + + + + + +Mozilla Bug 1188099 +

+ + +
+
+
+ + \ No newline at end of file diff --git a/dom/media/webspeech/synth/test/test_global_queue_cancel.html b/dom/media/webspeech/synth/test/test_global_queue_cancel.html new file mode 100644 index 000000000..4c5c11634 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_global_queue_cancel.html @@ -0,0 +1,35 @@ + + + + + + Test for Bug 1188099: Calling cancel() should work correctly with global queue + + + + + +Mozilla Bug 1188099 +

+ + +
+
+
+ + \ No newline at end of file diff --git a/dom/media/webspeech/synth/test/test_global_queue_pause.html b/dom/media/webspeech/synth/test/test_global_queue_pause.html new file mode 100644 index 000000000..f5ac1b98c --- /dev/null +++ b/dom/media/webspeech/synth/test/test_global_queue_pause.html @@ -0,0 +1,35 @@ + + + + + + Test for Bug 1188099: Calling pause() should work correctly with global queue + + + + + +Mozilla Bug 1188099 +

+ + +
+
+
+ + \ No newline at end of file diff --git a/dom/media/webspeech/synth/test/test_indirect_service_events.html b/dom/media/webspeech/synth/test/test_indirect_service_events.html new file mode 100644 index 000000000..d7f5ec424 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_indirect_service_events.html @@ -0,0 +1,36 @@ + + + + + + Test for Bug 1155034: Check that indirect audio services dispatch their own events + + + + + +Mozilla Bug 1155034 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_setup.html b/dom/media/webspeech/synth/test/test_setup.html new file mode 100644 index 000000000..6286b6c2e --- /dev/null +++ b/dom/media/webspeech/synth/test/test_setup.html @@ -0,0 +1,32 @@ + + + + + + Test for Bug 525444: Web Speech API check all classes are present + + + + +Mozilla Bug 650295 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_speech_cancel.html b/dom/media/webspeech/synth/test/test_speech_cancel.html new file mode 100644 index 000000000..e7cf051ef --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_cancel.html @@ -0,0 +1,35 @@ + + + + + + Test for Bug 1150315: Web Speech API check all classes are present + + + + + +Mozilla Bug 1150315 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_speech_error.html b/dom/media/webspeech/synth/test/test_speech_error.html new file mode 100644 index 000000000..c4bfdc6c4 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_error.html @@ -0,0 +1,35 @@ + + + + + + Test for Bug 1150315: Web Speech API check all classes are present + + + + + +Mozilla Bug 1226015 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_speech_queue.html b/dom/media/webspeech/synth/test/test_speech_queue.html new file mode 100644 index 000000000..ca652b243 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_queue.html @@ -0,0 +1,37 @@ + + + + + + Test for Bug 525444: Web Speech API, check speech synth queue + + + + + +Mozilla Bug 525444 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/test/test_speech_simple.html b/dom/media/webspeech/synth/test/test_speech_simple.html new file mode 100644 index 000000000..2eb75af43 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_simple.html @@ -0,0 +1,34 @@ + + + + + + Test for Bug 650295: Web Speech API check all classes are present + + + + + +Mozilla Bug 650295 +

+ + +
+
+
+ + diff --git a/dom/media/webspeech/synth/windows/SapiModule.cpp b/dom/media/webspeech/synth/windows/SapiModule.cpp new file mode 100644 index 000000000..f9d7c9a89 --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiModule.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsIClassInfoImpl.h" + +#include "SapiService.h" + +using namespace mozilla::dom; + +#define SAPISERVICE_CID \ + {0x21b4a45b, 0x9806, 0x4021, {0xa7, 0x06, 0xd7, 0x68, 0xab, 0x05, 0x48, 0xf9}} + +#define SAPISERVICE_CONTRACTID "@mozilla.org/synthsapi;1" + +// Defines SapiServiceConstructor +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(SapiService, + SapiService::GetInstanceForService) + +// Defines kSAPISERVICE_CID +NS_DEFINE_NAMED_CID(SAPISERVICE_CID); + +static const mozilla::Module::CIDEntry kCIDs[] = { + { &kSAPISERVICE_CID, true, nullptr, SapiServiceConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kContracts[] = { + { SAPISERVICE_CONTRACTID, &kSAPISERVICE_CID }, + { nullptr } +}; + +static const mozilla::Module::CategoryEntry kCategories[] = { + { "speech-synth-started", "Sapi Speech Synth", SAPISERVICE_CONTRACTID }, + { nullptr } +}; + +static void +UnloadSapiModule() +{ + SapiService::Shutdown(); +} + +static const mozilla::Module kModule = { + mozilla::Module::kVersion, + kCIDs, + kContracts, + kCategories, + nullptr, + nullptr, + UnloadSapiModule +}; + +NSMODULE_DEFN(synthsapi) = &kModule; diff --git a/dom/media/webspeech/synth/windows/SapiService.cpp b/dom/media/webspeech/synth/windows/SapiService.cpp new file mode 100644 index 000000000..95f35ebff --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiService.cpp @@ -0,0 +1,470 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "SapiService.h" +#include "nsServiceManagerUtils.h" +#include "nsWin32Locale.h" +#include "GeckoProfiler.h" +#include "nsEscape.h" + +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/Preferences.h" + +namespace mozilla { +namespace dom { + +StaticRefPtr SapiService::sSingleton; + +class SapiCallback final : public nsISpeechTaskCallback +{ +public: + SapiCallback(nsISpeechTask* aTask, ISpVoice* aSapiClient, + uint32_t aTextOffset, uint32_t aSpeakTextLen) + : mTask(aTask) + , mSapiClient(aSapiClient) + , mTextOffset(aTextOffset) + , mSpeakTextLen(aSpeakTextLen) + , mCurrentIndex(0) + , mStreamNum(0) + { + mStartingTime = GetTickCount(); + } + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SapiCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + ULONG GetStreamNum() const { return mStreamNum; } + void SetStreamNum(ULONG aValue) { mStreamNum = aValue; } + + void OnSpeechEvent(const SPEVENT& speechEvent); + +private: + ~SapiCallback() { } + + // This pointer is used to dispatch events + nsCOMPtr mTask; + RefPtr mSapiClient; + + uint32_t mTextOffset; + uint32_t mSpeakTextLen; + + // Used for calculating the time taken to speak the utterance + double mStartingTime; + uint32_t mCurrentIndex; + + ULONG mStreamNum; +}; + +NS_IMPL_CYCLE_COLLECTION(SapiCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SapiCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SapiCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SapiCallback) + +NS_IMETHODIMP +SapiCallback::OnPause() +{ + if (FAILED(mSapiClient->Pause())) { + return NS_ERROR_FAILURE; + } + if (!mTask) { + // When calling pause() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchPause(GetTickCount() - mStartingTime, mCurrentIndex); + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnResume() +{ + if (FAILED(mSapiClient->Resume())) { + return NS_ERROR_FAILURE; + } + if (!mTask) { + // When calling resume() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchResume(GetTickCount() - mStartingTime, mCurrentIndex); + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnCancel() +{ + // After cancel, mCurrentIndex may be updated. + // At cancel case, use mCurrentIndex for DispatchEnd. + mSpeakTextLen = 0; + // Purge all the previous utterances and speak an empty string + if (FAILED(mSapiClient->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr))) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnVolumeChanged(float aVolume) +{ + mSapiClient->SetVolume(static_cast(aVolume * 100)); + return NS_OK; +} + +void +SapiCallback::OnSpeechEvent(const SPEVENT& speechEvent) +{ + switch (speechEvent.eEventId) { + case SPEI_START_INPUT_STREAM: + mTask->DispatchStart(); + break; + case SPEI_END_INPUT_STREAM: + if (mSpeakTextLen) { + mCurrentIndex = mSpeakTextLen; + } + mTask->DispatchEnd(GetTickCount() - mStartingTime, mCurrentIndex); + mTask = nullptr; + break; + case SPEI_TTS_BOOKMARK: + mCurrentIndex = static_cast(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(NS_LITERAL_STRING("mark"), + GetTickCount() - mStartingTime, mCurrentIndex); + break; + case SPEI_WORD_BOUNDARY: + mCurrentIndex = static_cast(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(NS_LITERAL_STRING("word"), + GetTickCount() - mStartingTime, mCurrentIndex); + break; + case SPEI_SENTENCE_BOUNDARY: + mCurrentIndex = static_cast(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(NS_LITERAL_STRING("sentence"), + GetTickCount() - mStartingTime, mCurrentIndex); + break; + default: + break; + } +} + +// static +void __stdcall +SapiService::SpeechEventCallback(WPARAM aWParam, LPARAM aLParam) +{ + RefPtr spVoice = (ISpVoice*) aWParam; + RefPtr service = (SapiService*) aLParam; + + SPEVENT speechEvent; + while (spVoice->GetEvents(1, &speechEvent, nullptr) == S_OK) { + for (size_t i = 0; i < service->mCallbacks.Length(); i++) { + RefPtr callback = service->mCallbacks[i]; + if (callback->GetStreamNum() == speechEvent.ulStreamNum) { + callback->OnSpeechEvent(speechEvent); + if (speechEvent.eEventId == SPEI_END_INPUT_STREAM) { + service->mCallbacks.RemoveElementAt(i); + } + break; + } + } + } +} + +NS_INTERFACE_MAP_BEGIN(SapiService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(SapiService) +NS_IMPL_RELEASE(SapiService) + +SapiService::SapiService() + : mInitialized(false) +{ +} + +SapiService::~SapiService() +{ +} + +bool +SapiService::Init() +{ + PROFILER_LABEL_FUNC(js::ProfileEntry::Category::OTHER); + + MOZ_ASSERT(!mInitialized); + + if (Preferences::GetBool("media.webspeech.synth.test") || + !Preferences::GetBool("media.webspeech.synth.enabled")) { + // When enabled, we shouldn't add OS backend (Bug 1160844) + return false; + } + + // Get all the voices from sapi and register in the SynthVoiceRegistry + if (!RegisterVoices()) { + return false; + } + + mInitialized = true; + return true; +} + +already_AddRefed +SapiService::InitSapiInstance() +{ + RefPtr spVoice; + if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, + getter_AddRefs(spVoice)))) { + return nullptr; + } + + // Set interest for all the events we are interested in + ULONGLONG eventMask = + SPFEI(SPEI_START_INPUT_STREAM) | + SPFEI(SPEI_TTS_BOOKMARK) | + SPFEI(SPEI_WORD_BOUNDARY) | + SPFEI(SPEI_SENTENCE_BOUNDARY) | + SPFEI(SPEI_END_INPUT_STREAM); + + if (FAILED(spVoice->SetInterest(eventMask, eventMask))) { + return nullptr; + } + + // Set the callback function for receiving the events + spVoice->SetNotifyCallbackFunction( + (SPNOTIFYCALLBACK*) SapiService::SpeechEventCallback, + (WPARAM) spVoice.get(), (LPARAM) this); + + return spVoice.forget(); +} + +bool +SapiService::RegisterVoices() +{ + nsresult rv; + + nsCOMPtr registry = + do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID); + if (!registry) { + return false; + } + + RefPtr category; + if (FAILED(CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, + IID_ISpObjectTokenCategory, + getter_AddRefs(category)))) { + return false; + } + if (FAILED(category->SetId(SPCAT_VOICES, FALSE))) { + return false; + } + + RefPtr voiceTokens; + if (FAILED(category->EnumTokens(nullptr, nullptr, + getter_AddRefs(voiceTokens)))) { + return false; + } + + while (true) { + RefPtr voiceToken; + if (voiceTokens->Next(1, getter_AddRefs(voiceToken), nullptr) != S_OK) { + break; + } + + RefPtr attributes; + if (FAILED(voiceToken->OpenKey(L"Attributes", + getter_AddRefs(attributes)))) { + continue; + } + + WCHAR* language = nullptr; + if (FAILED(attributes->GetStringValue(L"Language", &language))) { + continue; + } + + // Language attribute is LCID by hex. So we need convert to locale + // name. + nsAutoString hexLcid; + LCID lcid = wcstol(language, nullptr, 16); + CoTaskMemFree(language); + nsAutoString locale; + nsWin32Locale::GetXPLocale(lcid, locale); + + WCHAR* description = nullptr; + if (FAILED(voiceToken->GetStringValue(nullptr, &description))) { + continue; + } + + nsAutoString uri; + uri.AssignLiteral("urn:moz-tts:sapi:"); + uri.Append(description); + uri.AppendLiteral("?"); + uri.Append(locale); + + // This service can only speak one utterance at a time, se we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + rv = registry->AddVoice(this, uri, nsDependentString(description), locale, + true, true); + CoTaskMemFree(description); + if (NS_FAILED(rv)) { + continue; + } + + mVoices.Put(uri, voiceToken); + } + + registry->NotifyVoicesChanged(); + + return true; +} + +NS_IMETHODIMP +SapiService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + NS_ENSURE_TRUE(mInitialized, NS_ERROR_NOT_AVAILABLE); + + RefPtr voiceToken; + if (!mVoices.Get(aUri, getter_AddRefs(voiceToken))) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr spVoice = InitSapiInstance(); + if (!spVoice) { + return NS_ERROR_FAILURE; + } + + if (FAILED(spVoice->SetVoice(voiceToken))) { + return NS_ERROR_FAILURE; + } + + if (FAILED(spVoice->SetVolume(static_cast(aVolume * 100)))) { + return NS_ERROR_FAILURE; + } + + // The max supported rate in SAPI engines is 3x, and the min is 1/3x. It is + // expressed by an integer. 0 being normal rate, -10 is 1/3 and 10 is 3x. + // Values below and above that are allowed, but the engine may clip the rate + // to its maximum capable value. + // "Each increment between -10 and +10 is logarithmically distributed such + // that incrementing or decrementing by 1 is multiplying or dividing the + // rate by the 10th root of 3" + // https://msdn.microsoft.com/en-us/library/ee431826(v=vs.85).aspx + long rate = aRate != 0 ? static_cast(10 * log10(aRate) / log10(3)) : 0; + if (FAILED(spVoice->SetRate(rate))) { + return NS_ERROR_FAILURE; + } + + // Set the pitch using xml + nsAutoString xml; + xml.AssignLiteral("(aPitch * 10.0f - 10.0f)); + xml.AppendLiteral("\">"); + uint32_t textOffset = xml.Length(); + + for (size_t i = 0; i < aText.Length(); i++) { + switch (aText[i]) { + case '&': + xml.AppendLiteral("&"); + break; + case '<': + xml.AppendLiteral("<"); + break; + case '>': + xml.AppendLiteral(">"); + break; + default: + xml.Append(aText[i]); + break; + } + } + + xml.AppendLiteral(""); + + RefPtr callback = + new SapiCallback(aTask, spVoice, textOffset, aText.Length()); + + // The last three parameters doesn't matter for an indirect service + nsresult rv = aTask->Setup(callback, 0, 0, 0); + if (NS_FAILED(rv)) { + return rv; + } + + ULONG streamNum; + if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) { + aTask->Setup(nullptr, 0, 0, 0); + return NS_ERROR_FAILURE; + } + + callback->SetStreamNum(streamNum); + // streamNum reassigns same value when last stream is finished even if + // callback for stream end isn't called + // So we cannot use data hashtable and has to add it to vector at last. + mCallbacks.AppendElement(callback); + + return NS_OK; +} + +NS_IMETHODIMP +SapiService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +NS_IMETHODIMP +SapiService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + return NS_OK; +} + +SapiService* +SapiService::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT(false, + "SapiService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton) { + RefPtr service = new SapiService(); + if (service->Init()) { + sSingleton = service; + } + } + return sSingleton; +} + +already_AddRefed +SapiService::GetInstanceForService() +{ + RefPtr sapiService = GetInstance(); + return sapiService.forget(); +} + +void +SapiService::Shutdown() +{ + if (!sSingleton) { + return; + } + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/windows/SapiService.h b/dom/media/webspeech/synth/windows/SapiService.h new file mode 100644 index 000000000..cde743cc2 --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiService.h @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SapiService_h +#define mozilla_dom_SapiService_h + +#include "nsISpeechService.h" +#include "nsIObserver.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" +#include "mozilla/StaticPtr.h" + +#include +#include + +namespace mozilla { +namespace dom { + +class SapiCallback; + +class SapiService final : public nsISpeechService + , public nsIObserver +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + NS_DECL_NSIOBSERVER + + SapiService(); + bool Init(); + + static SapiService* GetInstance(); + static already_AddRefed GetInstanceForService(); + + static void Shutdown(); + + static void __stdcall SpeechEventCallback(WPARAM aWParam, LPARAM aLParam); + +private: + virtual ~SapiService(); + + already_AddRefed InitSapiInstance(); + bool RegisterVoices(); + + nsRefPtrHashtable mVoices; + nsTArray> mCallbacks; + + bool mInitialized; + + static StaticRefPtr sSingleton; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/windows/moz.build b/dom/media/webspeech/synth/windows/moz.build new file mode 100644 index 000000000..f0ff9f2c9 --- /dev/null +++ b/dom/media/webspeech/synth/windows/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + 'SapiModule.cpp', + 'SapiService.cpp' +] +include('/ipc/chromium/chromium-config.mozbuild') + +FINAL_LIBRARY = 'xul' -- cgit v1.2.3