diff options
Diffstat (limited to 'dom/media/webspeech/synth/windows')
-rw-r--r-- | dom/media/webspeech/synth/windows/SapiModule.cpp | 57 | ||||
-rw-r--r-- | dom/media/webspeech/synth/windows/SapiService.cpp | 470 | ||||
-rw-r--r-- | dom/media/webspeech/synth/windows/SapiService.h | 59 | ||||
-rw-r--r-- | dom/media/webspeech/synth/windows/moz.build | 13 |
4 files changed, 599 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/windows/SapiModule.cpp b/dom/media/webspeech/synth/windows/SapiModule.cpp new file mode 100644 index 000000000..f9d7c9a89 --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiModule.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsIClassInfoImpl.h" + +#include "SapiService.h" + +using namespace mozilla::dom; + +#define SAPISERVICE_CID \ + {0x21b4a45b, 0x9806, 0x4021, {0xa7, 0x06, 0xd7, 0x68, 0xab, 0x05, 0x48, 0xf9}} + +#define SAPISERVICE_CONTRACTID "@mozilla.org/synthsapi;1" + +// Defines SapiServiceConstructor +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(SapiService, + SapiService::GetInstanceForService) + +// Defines kSAPISERVICE_CID +NS_DEFINE_NAMED_CID(SAPISERVICE_CID); + +static const mozilla::Module::CIDEntry kCIDs[] = { + { &kSAPISERVICE_CID, true, nullptr, SapiServiceConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kContracts[] = { + { SAPISERVICE_CONTRACTID, &kSAPISERVICE_CID }, + { nullptr } +}; + +static const mozilla::Module::CategoryEntry kCategories[] = { + { "speech-synth-started", "Sapi Speech Synth", SAPISERVICE_CONTRACTID }, + { nullptr } +}; + +static void +UnloadSapiModule() +{ + SapiService::Shutdown(); +} + +static const mozilla::Module kModule = { + mozilla::Module::kVersion, + kCIDs, + kContracts, + kCategories, + nullptr, + nullptr, + UnloadSapiModule +}; + +NSMODULE_DEFN(synthsapi) = &kModule; diff --git a/dom/media/webspeech/synth/windows/SapiService.cpp b/dom/media/webspeech/synth/windows/SapiService.cpp new file mode 100644 index 000000000..95f35ebff --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiService.cpp @@ -0,0 +1,470 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "SapiService.h" +#include "nsServiceManagerUtils.h" +#include "nsWin32Locale.h" +#include "GeckoProfiler.h" +#include "nsEscape.h" + +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/Preferences.h" + +namespace mozilla { +namespace dom { + +StaticRefPtr<SapiService> SapiService::sSingleton; + +class SapiCallback final : public nsISpeechTaskCallback +{ +public: + SapiCallback(nsISpeechTask* aTask, ISpVoice* aSapiClient, + uint32_t aTextOffset, uint32_t aSpeakTextLen) + : mTask(aTask) + , mSapiClient(aSapiClient) + , mTextOffset(aTextOffset) + , mSpeakTextLen(aSpeakTextLen) + , mCurrentIndex(0) + , mStreamNum(0) + { + mStartingTime = GetTickCount(); + } + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SapiCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + ULONG GetStreamNum() const { return mStreamNum; } + void SetStreamNum(ULONG aValue) { mStreamNum = aValue; } + + void OnSpeechEvent(const SPEVENT& speechEvent); + +private: + ~SapiCallback() { } + + // This pointer is used to dispatch events + nsCOMPtr<nsISpeechTask> mTask; + RefPtr<ISpVoice> mSapiClient; + + uint32_t mTextOffset; + uint32_t mSpeakTextLen; + + // Used for calculating the time taken to speak the utterance + double mStartingTime; + uint32_t mCurrentIndex; + + ULONG mStreamNum; +}; + +NS_IMPL_CYCLE_COLLECTION(SapiCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SapiCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SapiCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SapiCallback) + +NS_IMETHODIMP +SapiCallback::OnPause() +{ + if (FAILED(mSapiClient->Pause())) { + return NS_ERROR_FAILURE; + } + if (!mTask) { + // When calling pause() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchPause(GetTickCount() - mStartingTime, mCurrentIndex); + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnResume() +{ + if (FAILED(mSapiClient->Resume())) { + return NS_ERROR_FAILURE; + } + if (!mTask) { + // When calling resume() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchResume(GetTickCount() - mStartingTime, mCurrentIndex); + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnCancel() +{ + // After cancel, mCurrentIndex may be updated. + // At cancel case, use mCurrentIndex for DispatchEnd. + mSpeakTextLen = 0; + // Purge all the previous utterances and speak an empty string + if (FAILED(mSapiClient->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr))) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnVolumeChanged(float aVolume) +{ + mSapiClient->SetVolume(static_cast<USHORT>(aVolume * 100)); + return NS_OK; +} + +void +SapiCallback::OnSpeechEvent(const SPEVENT& speechEvent) +{ + switch (speechEvent.eEventId) { + case SPEI_START_INPUT_STREAM: + mTask->DispatchStart(); + break; + case SPEI_END_INPUT_STREAM: + if (mSpeakTextLen) { + mCurrentIndex = mSpeakTextLen; + } + mTask->DispatchEnd(GetTickCount() - mStartingTime, mCurrentIndex); + mTask = nullptr; + break; + case SPEI_TTS_BOOKMARK: + mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(NS_LITERAL_STRING("mark"), + GetTickCount() - mStartingTime, mCurrentIndex); + break; + case SPEI_WORD_BOUNDARY: + mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(NS_LITERAL_STRING("word"), + GetTickCount() - mStartingTime, mCurrentIndex); + break; + case SPEI_SENTENCE_BOUNDARY: + mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(NS_LITERAL_STRING("sentence"), + GetTickCount() - mStartingTime, mCurrentIndex); + break; + default: + break; + } +} + +// static +void __stdcall +SapiService::SpeechEventCallback(WPARAM aWParam, LPARAM aLParam) +{ + RefPtr<ISpVoice> spVoice = (ISpVoice*) aWParam; + RefPtr<SapiService> service = (SapiService*) aLParam; + + SPEVENT speechEvent; + while (spVoice->GetEvents(1, &speechEvent, nullptr) == S_OK) { + for (size_t i = 0; i < service->mCallbacks.Length(); i++) { + RefPtr<SapiCallback> callback = service->mCallbacks[i]; + if (callback->GetStreamNum() == speechEvent.ulStreamNum) { + callback->OnSpeechEvent(speechEvent); + if (speechEvent.eEventId == SPEI_END_INPUT_STREAM) { + service->mCallbacks.RemoveElementAt(i); + } + break; + } + } + } +} + +NS_INTERFACE_MAP_BEGIN(SapiService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(SapiService) +NS_IMPL_RELEASE(SapiService) + +SapiService::SapiService() + : mInitialized(false) +{ +} + +SapiService::~SapiService() +{ +} + +bool +SapiService::Init() +{ + PROFILER_LABEL_FUNC(js::ProfileEntry::Category::OTHER); + + MOZ_ASSERT(!mInitialized); + + if (Preferences::GetBool("media.webspeech.synth.test") || + !Preferences::GetBool("media.webspeech.synth.enabled")) { + // When enabled, we shouldn't add OS backend (Bug 1160844) + return false; + } + + // Get all the voices from sapi and register in the SynthVoiceRegistry + if (!RegisterVoices()) { + return false; + } + + mInitialized = true; + return true; +} + +already_AddRefed<ISpVoice> +SapiService::InitSapiInstance() +{ + RefPtr<ISpVoice> spVoice; + if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, + getter_AddRefs(spVoice)))) { + return nullptr; + } + + // Set interest for all the events we are interested in + ULONGLONG eventMask = + SPFEI(SPEI_START_INPUT_STREAM) | + SPFEI(SPEI_TTS_BOOKMARK) | + SPFEI(SPEI_WORD_BOUNDARY) | + SPFEI(SPEI_SENTENCE_BOUNDARY) | + SPFEI(SPEI_END_INPUT_STREAM); + + if (FAILED(spVoice->SetInterest(eventMask, eventMask))) { + return nullptr; + } + + // Set the callback function for receiving the events + spVoice->SetNotifyCallbackFunction( + (SPNOTIFYCALLBACK*) SapiService::SpeechEventCallback, + (WPARAM) spVoice.get(), (LPARAM) this); + + return spVoice.forget(); +} + +bool +SapiService::RegisterVoices() +{ + nsresult rv; + + nsCOMPtr<nsISynthVoiceRegistry> registry = + do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID); + if (!registry) { + return false; + } + + RefPtr<ISpObjectTokenCategory> category; + if (FAILED(CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, + IID_ISpObjectTokenCategory, + getter_AddRefs(category)))) { + return false; + } + if (FAILED(category->SetId(SPCAT_VOICES, FALSE))) { + return false; + } + + RefPtr<IEnumSpObjectTokens> voiceTokens; + if (FAILED(category->EnumTokens(nullptr, nullptr, + getter_AddRefs(voiceTokens)))) { + return false; + } + + while (true) { + RefPtr<ISpObjectToken> voiceToken; + if (voiceTokens->Next(1, getter_AddRefs(voiceToken), nullptr) != S_OK) { + break; + } + + RefPtr<ISpDataKey> attributes; + if (FAILED(voiceToken->OpenKey(L"Attributes", + getter_AddRefs(attributes)))) { + continue; + } + + WCHAR* language = nullptr; + if (FAILED(attributes->GetStringValue(L"Language", &language))) { + continue; + } + + // Language attribute is LCID by hex. So we need convert to locale + // name. + nsAutoString hexLcid; + LCID lcid = wcstol(language, nullptr, 16); + CoTaskMemFree(language); + nsAutoString locale; + nsWin32Locale::GetXPLocale(lcid, locale); + + WCHAR* description = nullptr; + if (FAILED(voiceToken->GetStringValue(nullptr, &description))) { + continue; + } + + nsAutoString uri; + uri.AssignLiteral("urn:moz-tts:sapi:"); + uri.Append(description); + uri.AppendLiteral("?"); + uri.Append(locale); + + // This service can only speak one utterance at a time, se we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + rv = registry->AddVoice(this, uri, nsDependentString(description), locale, + true, true); + CoTaskMemFree(description); + if (NS_FAILED(rv)) { + continue; + } + + mVoices.Put(uri, voiceToken); + } + + registry->NotifyVoicesChanged(); + + return true; +} + +NS_IMETHODIMP +SapiService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + NS_ENSURE_TRUE(mInitialized, NS_ERROR_NOT_AVAILABLE); + + RefPtr<ISpObjectToken> voiceToken; + if (!mVoices.Get(aUri, getter_AddRefs(voiceToken))) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr<ISpVoice> spVoice = InitSapiInstance(); + if (!spVoice) { + return NS_ERROR_FAILURE; + } + + if (FAILED(spVoice->SetVoice(voiceToken))) { + return NS_ERROR_FAILURE; + } + + if (FAILED(spVoice->SetVolume(static_cast<USHORT>(aVolume * 100)))) { + return NS_ERROR_FAILURE; + } + + // The max supported rate in SAPI engines is 3x, and the min is 1/3x. It is + // expressed by an integer. 0 being normal rate, -10 is 1/3 and 10 is 3x. + // Values below and above that are allowed, but the engine may clip the rate + // to its maximum capable value. + // "Each increment between -10 and +10 is logarithmically distributed such + // that incrementing or decrementing by 1 is multiplying or dividing the + // rate by the 10th root of 3" + // https://msdn.microsoft.com/en-us/library/ee431826(v=vs.85).aspx + long rate = aRate != 0 ? static_cast<long>(10 * log10(aRate) / log10(3)) : 0; + if (FAILED(spVoice->SetRate(rate))) { + return NS_ERROR_FAILURE; + } + + // Set the pitch using xml + nsAutoString xml; + xml.AssignLiteral("<pitch absmiddle=\""); + // absmiddle doesn't allow float type + xml.AppendInt(static_cast<int32_t>(aPitch * 10.0f - 10.0f)); + xml.AppendLiteral("\">"); + uint32_t textOffset = xml.Length(); + + for (size_t i = 0; i < aText.Length(); i++) { + switch (aText[i]) { + case '&': + xml.AppendLiteral("&"); + break; + case '<': + xml.AppendLiteral("<"); + break; + case '>': + xml.AppendLiteral(">"); + break; + default: + xml.Append(aText[i]); + break; + } + } + + xml.AppendLiteral("</pitch>"); + + RefPtr<SapiCallback> callback = + new SapiCallback(aTask, spVoice, textOffset, aText.Length()); + + // The last three parameters doesn't matter for an indirect service + nsresult rv = aTask->Setup(callback, 0, 0, 0); + if (NS_FAILED(rv)) { + return rv; + } + + ULONG streamNum; + if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) { + aTask->Setup(nullptr, 0, 0, 0); + return NS_ERROR_FAILURE; + } + + callback->SetStreamNum(streamNum); + // streamNum reassigns same value when last stream is finished even if + // callback for stream end isn't called + // So we cannot use data hashtable and has to add it to vector at last. + mCallbacks.AppendElement(callback); + + return NS_OK; +} + +NS_IMETHODIMP +SapiService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +NS_IMETHODIMP +SapiService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + return NS_OK; +} + +SapiService* +SapiService::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT(false, + "SapiService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton) { + RefPtr<SapiService> service = new SapiService(); + if (service->Init()) { + sSingleton = service; + } + } + return sSingleton; +} + +already_AddRefed<SapiService> +SapiService::GetInstanceForService() +{ + RefPtr<SapiService> sapiService = GetInstance(); + return sapiService.forget(); +} + +void +SapiService::Shutdown() +{ + if (!sSingleton) { + return; + } + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/windows/SapiService.h b/dom/media/webspeech/synth/windows/SapiService.h new file mode 100644 index 000000000..cde743cc2 --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiService.h @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SapiService_h +#define mozilla_dom_SapiService_h + +#include "nsISpeechService.h" +#include "nsIObserver.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" +#include "mozilla/StaticPtr.h" + +#include <windows.h> +#include <sapi.h> + +namespace mozilla { +namespace dom { + +class SapiCallback; + +class SapiService final : public nsISpeechService + , public nsIObserver +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + NS_DECL_NSIOBSERVER + + SapiService(); + bool Init(); + + static SapiService* GetInstance(); + static already_AddRefed<SapiService> GetInstanceForService(); + + static void Shutdown(); + + static void __stdcall SpeechEventCallback(WPARAM aWParam, LPARAM aLParam); + +private: + virtual ~SapiService(); + + already_AddRefed<ISpVoice> InitSapiInstance(); + bool RegisterVoices(); + + nsRefPtrHashtable<nsStringHashKey, ISpObjectToken> mVoices; + nsTArray<RefPtr<SapiCallback>> mCallbacks; + + bool mInitialized; + + static StaticRefPtr<SapiService> sSingleton; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/windows/moz.build b/dom/media/webspeech/synth/windows/moz.build new file mode 100644 index 000000000..f0ff9f2c9 --- /dev/null +++ b/dom/media/webspeech/synth/windows/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + 'SapiModule.cpp', + 'SapiService.cpp' +] +include('/ipc/chromium/chromium-config.mozbuild') + +FINAL_LIBRARY = 'xul' |