From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- .../synth/speechd/SpeechDispatcherService.cpp | 593 +++++++++++++++++++++ 1 file changed, 593 insertions(+) create mode 100644 dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp (limited to 'dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp') diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp new file mode 100644 index 000000000..77a8f7cd9 --- /dev/null +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp @@ -0,0 +1,593 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechDispatcherService.h" + +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/Preferences.h" +#include "nsEscape.h" +#include "nsISupports.h" +#include "nsPrintfCString.h" +#include "nsReadableUtils.h" +#include "nsServiceManagerUtils.h" +#include "nsThreadUtils.h" +#include "prlink.h" + +#include +#include + +#define URI_PREFIX "urn:moz-tts:speechd:" + +#define MAX_RATE static_cast(2.5) +#define MIN_RATE static_cast(0.5) + +// Some structures for libspeechd +typedef enum { + SPD_EVENT_BEGIN, + SPD_EVENT_END, + SPD_EVENT_INDEX_MARK, + SPD_EVENT_CANCEL, + SPD_EVENT_PAUSE, + SPD_EVENT_RESUME +} SPDNotificationType; + +typedef enum { + SPD_BEGIN = 1, + SPD_END = 2, + SPD_INDEX_MARKS = 4, + SPD_CANCEL = 8, + SPD_PAUSE = 16, + SPD_RESUME = 32, + + SPD_ALL = 0x3f +} SPDNotification; + +typedef enum { + SPD_MODE_SINGLE = 0, + SPD_MODE_THREADED = 1 +} SPDConnectionMode; + +typedef void (*SPDCallback) (size_t msg_id, size_t client_id, + SPDNotificationType state); + +typedef void (*SPDCallbackIM) (size_t msg_id, size_t client_id, + SPDNotificationType state, char* index_mark); + +struct SPDConnection +{ + SPDCallback callback_begin; + SPDCallback callback_end; + SPDCallback callback_cancel; + SPDCallback callback_pause; + SPDCallback callback_resume; + SPDCallbackIM callback_im; + + /* partial, more private fields in structure */ +}; + +struct SPDVoice +{ + char* name; + char* language; + char* variant; +}; + +typedef enum { + SPD_IMPORTANT = 1, + SPD_MESSAGE = 2, + SPD_TEXT = 3, + SPD_NOTIFICATION = 4, + SPD_PROGRESS = 5 +} SPDPriority; + +#define SPEECHD_FUNCTIONS \ + FUNC(spd_open, SPDConnection*, (const char*, const char*, const char*, SPDConnectionMode)) \ + FUNC(spd_close, void, (SPDConnection*)) \ + FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*)) \ + FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*)) \ + FUNC(spd_cancel, int, (SPDConnection*)) \ + FUNC(spd_set_volume, int, (SPDConnection*, int)) \ + FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \ + FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \ + FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \ + FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification)) + +#define FUNC(name, type, params) \ + typedef type (*_##name##_fn) params; \ + static _##name##_fn _##name; + +SPEECHD_FUNCTIONS + +#undef FUNC + +#define spd_open _spd_open +#define spd_close _spd_close +#define spd_list_synthesis_voices _spd_list_synthesis_voices +#define spd_say _spd_say +#define spd_cancel _spd_cancel +#define spd_set_volume _spd_set_volume +#define spd_set_voice_rate _spd_set_voice_rate +#define spd_set_voice_pitch _spd_set_voice_pitch +#define spd_set_synthesis_voice _spd_set_synthesis_voice +#define spd_set_notification_on _spd_set_notification_on + +static PRLibrary* speechdLib = nullptr; + +typedef void (*nsSpeechDispatcherFunc)(); +struct nsSpeechDispatcherDynamicFunction +{ + const char* functionName; + nsSpeechDispatcherFunc* function; +}; + +namespace mozilla { +namespace dom { + +StaticRefPtr SpeechDispatcherService::sSingleton; + +class SpeechDispatcherVoice +{ +public: + + SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage) + : mName(aName), mLanguage(aLanguage) {} + + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice) + + // Voice name + nsString mName; + + // Voice language, in BCP-47 syntax + nsString mLanguage; + +private: + ~SpeechDispatcherVoice() {} +}; + + +class SpeechDispatcherCallback final : public nsISpeechTaskCallback +{ +public: + SpeechDispatcherCallback(nsISpeechTask* aTask, SpeechDispatcherService* aService) + : mTask(aTask) + , mService(aService) {} + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + bool OnSpeechEvent(SPDNotificationType state); + +private: + ~SpeechDispatcherCallback() { } + + // This pointer is used to dispatch events + nsCOMPtr mTask; + + // By holding a strong reference to the service we guarantee that it won't be + // destroyed before this runnable. + RefPtr mService; + + TimeStamp mStartTime; +}; + +NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback) + +NS_IMETHODIMP +SpeechDispatcherCallback::OnPause() +{ + // XXX: Speech dispatcher does not pause immediately, but waits for the speech + // to reach an index mark so that it could resume from that offset. + // There is no support for word or sentence boundaries, so index marks would + // only occur in explicit SSML marks, and we don't support that yet. + // What in actuality happens, is that if you call spd_pause(), it will speak + // the utterance in its entirety, dispatch an end event, and then put speechd + // in a 'paused' state. Since it is after the utterance ended, we don't get + // that state change, and our speech api is in an unrecoverable state. + // So, since it is useless anyway, I am not implementing pause. + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnResume() +{ + // XXX: Unsupported, see OnPause(). + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnCancel() +{ + if (spd_cancel(mService->mSpeechdClient) < 0) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnVolumeChanged(float aVolume) +{ + // XXX: This currently does not change the volume mid-utterance, but it + // doesn't do anything bad either. So we could put this here with the hopes + // that speechd supports this in the future. + if (spd_set_volume(mService->mSpeechdClient, static_cast(aVolume * 100)) < 0) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +bool +SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) +{ + bool remove = false; + + switch (state) { + case SPD_EVENT_BEGIN: + mStartTime = TimeStamp::Now(); + mTask->DispatchStart(); + break; + + case SPD_EVENT_PAUSE: + mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + break; + + case SPD_EVENT_RESUME: + mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + break; + + case SPD_EVENT_CANCEL: + case SPD_EVENT_END: + mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + remove = true; + break; + + case SPD_EVENT_INDEX_MARK: + // Not yet supported + break; + + default: + break; + } + + return remove; +} + +static void +speechd_cb(size_t msg_id, size_t client_id, SPDNotificationType state) +{ + SpeechDispatcherService* service = SpeechDispatcherService::GetInstance(false); + + if (service) { + NS_DispatchToMainThread( + NewRunnableMethod( + service, &SpeechDispatcherService::EventNotify, + static_cast(msg_id), state)); + } +} + + +NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(SpeechDispatcherService) +NS_IMPL_RELEASE(SpeechDispatcherService) + +SpeechDispatcherService::SpeechDispatcherService() + : mInitialized(false) + , mSpeechdClient(nullptr) +{ +} + +void +SpeechDispatcherService::Init() +{ + if (!Preferences::GetBool("media.webspeech.synth.enabled") || + Preferences::GetBool("media.webspeech.synth.test")) { + return; + } + + // While speech dispatcher has a "threaded" mode, only spd_say() is async. + // Since synchronous socket i/o could impact startup time, we do + // initialization in a separate thread. + DebugOnly rv = NS_NewNamedThread("speechd init", + getter_AddRefs(mInitThread)); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + rv = mInitThread->Dispatch( + NewRunnableMethod(this, &SpeechDispatcherService::Setup), NS_DISPATCH_NORMAL); + MOZ_ASSERT(NS_SUCCEEDED(rv)); +} + +SpeechDispatcherService::~SpeechDispatcherService() +{ + if (mInitThread) { + mInitThread->Shutdown(); + } + + if (mSpeechdClient) { + spd_close(mSpeechdClient); + } +} + +void +SpeechDispatcherService::Setup() +{ +#define FUNC(name, type, params) { #name, (nsSpeechDispatcherFunc *)&_##name }, + static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = { + SPEECHD_FUNCTIONS + }; +#undef FUNC + + MOZ_ASSERT(!mInitialized); + + speechdLib = PR_LoadLibrary("libspeechd.so.2"); + + if (!speechdLib) { + NS_WARNING("Failed to load speechd library"); + return; + } + + if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) { + // There is no version getter function, so we rely on a symbol that was + // introduced in release 0.8.2 in order to check for ABI compatibility. + NS_WARNING("Unsupported version of speechd detected"); + return; + } + + for (uint32_t i = 0; i < ArrayLength(kSpeechDispatcherSymbols); i++) { + *kSpeechDispatcherSymbols[i].function = + PR_FindFunctionSymbol(speechdLib, kSpeechDispatcherSymbols[i].functionName); + + if (!*kSpeechDispatcherSymbols[i].function) { + NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'", + kSpeechDispatcherSymbols[i].functionName).get()); + return; + } + } + + mSpeechdClient = spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED); + if (!mSpeechdClient) { + NS_WARNING("Failed to call spd_open"); + return; + } + + // Get all the voices from sapi and register in the SynthVoiceRegistry + SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient); + + mSpeechdClient->callback_begin = speechd_cb; + mSpeechdClient->callback_end = speechd_cb; + mSpeechdClient->callback_cancel = speechd_cb; + mSpeechdClient->callback_pause = speechd_cb; + mSpeechdClient->callback_resume = speechd_cb; + + spd_set_notification_on(mSpeechdClient, SPD_BEGIN); + spd_set_notification_on(mSpeechdClient, SPD_END); + spd_set_notification_on(mSpeechdClient, SPD_CANCEL); + + if (list != NULL) { + for (int i = 0; list[i]; i++) { + nsAutoString uri; + + uri.AssignLiteral(URI_PREFIX); + nsAutoCString name; + NS_EscapeURL(list[i]->name, -1, esc_OnlyNonASCII | esc_AlwaysCopy, name); + uri.Append(NS_ConvertUTF8toUTF16(name));; + uri.AppendLiteral("?"); + + nsAutoCString lang(list[i]->language); + + if (strcmp(list[i]->variant, "none") != 0) { + // In speech dispatcher, the variant will usually be the locale subtag + // with another, non-standard suptag after it. We keep the first one + // and convert it to uppercase. + const char* v = list[i]->variant; + const char* hyphen = strchr(v, '-'); + nsDependentCSubstring variant(v, hyphen ? hyphen - v : strlen(v)); + ToUpperCase(variant); + + // eSpeak uses UK which is not a valid region subtag in BCP47. + if (variant.Equals("UK")) { + variant.AssignLiteral("GB"); + } + + lang.AppendLiteral("-"); + lang.Append(variant); + } + + uri.Append(NS_ConvertUTF8toUTF16(lang)); + + mVoices.Put(uri, new SpeechDispatcherVoice( + NS_ConvertUTF8toUTF16(list[i]->name), + NS_ConvertUTF8toUTF16(lang))); + } + } + + NS_DispatchToMainThread(NewRunnableMethod(this, &SpeechDispatcherService::RegisterVoices)); + + //mInitialized = true; +} + +// private methods + +void +SpeechDispatcherService::RegisterVoices() +{ + RefPtr registry = nsSynthVoiceRegistry::GetInstance(); + for (auto iter = mVoices.Iter(); !iter.Done(); iter.Next()) { + RefPtr& voice = iter.Data(); + + // This service can only speak one utterance at a time, so we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + DebugOnly rv = + registry->AddVoice(this, iter.Key(), voice->mName, voice->mLanguage, + voice->mName.EqualsLiteral("default"), true); + + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice"); + } + + mInitThread->Shutdown(); + mInitThread = nullptr; + + mInitialized = true; + + registry->NotifyVoicesChanged(); +} + +// nsIObserver + +NS_IMETHODIMP +SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + return NS_OK; +} + +// nsISpeechService + +// TODO: Support SSML +NS_IMETHODIMP +SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) +{ + if (NS_WARN_IF(!mInitialized)) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr callback = + new SpeechDispatcherCallback(aTask, this); + + bool found = false; + SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found); + + if(NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + spd_set_synthesis_voice(mSpeechdClient, + NS_ConvertUTF16toUTF8(voice->mName).get()); + + // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100. + spd_set_volume(mSpeechdClient, static_cast(aVolume * 100)); + + // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate. + // speechd expects -100 to 100 with 0 being normal rate. + float rate = 0; + if (aRate > 1) { + // Each step to 100 is logarithmically distributed up to 2.5x. + rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100; + } else if (aRate < 1) { + // Each step to -100 is logarithmically distributed down to 0.5x. + rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100; + } + + spd_set_voice_rate(mSpeechdClient, static_cast(rate)); + + // We provide a pitch of 0 to 2 with 1 being the default. + // speech-dispatcher expects -100 to 100 with 0 being default. + spd_set_voice_pitch(mSpeechdClient, static_cast((aPitch - 1) * 100)); + + // The last three parameters don't matter for an indirect service + nsresult rv = aTask->Setup(callback, 0, 0, 0); + + if (NS_FAILED(rv)) { + return rv; + } + + if (aText.Length()) { + int msg_id = spd_say( + mSpeechdClient, SPD_MESSAGE, NS_ConvertUTF16toUTF8(aText).get()); + + if (msg_id < 0) { + return NS_ERROR_FAILURE; + } + + mCallbacks.Put(msg_id, callback); + } else { + // Speech dispatcher does not work well with empty strings. + // In that case, don't send empty string to speechd, + // and just emulate a speechd start and end event. + NS_DispatchToMainThread(NewRunnableMethod( + callback, &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN)); + + NS_DispatchToMainThread(NewRunnableMethod( + callback, &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END)); + } + + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +SpeechDispatcherService* +SpeechDispatcherService::GetInstance(bool create) +{ + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT(false, + "SpeechDispatcherService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton && create) { + sSingleton = new SpeechDispatcherService(); + sSingleton->Init(); + } + + return sSingleton; +} + +already_AddRefed +SpeechDispatcherService::GetInstanceForService() +{ + MOZ_ASSERT(NS_IsMainThread()); + RefPtr sapiService = GetInstance(); + return sapiService.forget(); +} + +void +SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) +{ + SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId); + + if (callback) { + if (callback->OnSpeechEvent((SPDNotificationType)aState)) { + mCallbacks.Remove(aMsgId); + } + } +} + +void +SpeechDispatcherService::Shutdown() +{ + if (!sSingleton) { + return; + } + + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla -- cgit v1.2.3