summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm')
-rw-r--r--dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm499
1 files changed, 499 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
new file mode 100644
index 000000000..df6c5eaa0
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@@ -0,0 +1,499 @@
+/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsServiceManagerUtils.h"
+#include "nsObjCExceptions.h"
+#include "nsCocoaUtils.h"
+#include "nsThreadUtils.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/Assertions.h"
+#include "OSXSpeechSynthesizerService.h"
+
+#import <Cocoa/Cocoa.h>
+
+// We can escape the default delimiters ("[[" and "]]") by temporarily
+// changing the delimiters just before they appear, and changing them back
+// just after.
+#define DLIM_ESCAPE_START "[[dlim (( ))]]"
+#define DLIM_ESCAPE_END "((dlim [[ ]]))"
+
+using namespace mozilla;
+
+class SpeechTaskCallback final : public nsISpeechTaskCallback
+{
+public:
+ SpeechTaskCallback(nsISpeechTask* aTask,
+ NSSpeechSynthesizer* aSynth,
+ const nsTArray<size_t>& aOffsets)
+ : mTask(aTask)
+ , mSpeechSynthesizer(aSynth)
+ , mOffsets(aOffsets)
+ {
+ mStartingTime = TimeStamp::Now();
+ }
+
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback)
+
+ NS_DECL_NSISPEECHTASKCALLBACK
+
+ void OnWillSpeakWord(uint32_t aIndex);
+ void OnError(uint32_t aIndex);
+ void OnDidFinishSpeaking();
+
+private:
+ virtual ~SpeechTaskCallback()
+ {
+ [mSpeechSynthesizer release];
+ }
+
+ float GetTimeDurationFromStart();
+
+ nsCOMPtr<nsISpeechTask> mTask;
+ NSSpeechSynthesizer* mSpeechSynthesizer;
+ TimeStamp mStartingTime;
+ uint32_t mCurrentIndex;
+ nsTArray<size_t> mOffsets;
+};
+
+NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
+ NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnCancel()
+{
+ NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT;
+
+ [mSpeechSynthesizer stopSpeaking];
+ return NS_OK;
+
+ NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnPause()
+{
+ NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT;
+
+ [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+ if (!mTask) {
+ // When calling pause() on child porcess, it may not receive end event
+ // from chrome process yet.
+ return NS_ERROR_FAILURE;
+ }
+ mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
+ return NS_OK;
+
+ NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnResume()
+{
+ NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT;
+
+ [mSpeechSynthesizer continueSpeaking];
+ if (!mTask) {
+ // When calling resume() on child porcess, it may not receive end event
+ // from chrome process yet.
+ return NS_ERROR_FAILURE;
+ }
+ mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
+ return NS_OK;
+
+ NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnVolumeChanged(float aVolume)
+{
+ NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT;
+
+ [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
+ forProperty:NSSpeechVolumeProperty error:nil];
+ return NS_OK;
+
+ NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
+}
+
+float
+SpeechTaskCallback::GetTimeDurationFromStart()
+{
+ TimeDuration duration = TimeStamp::Now() - mStartingTime;
+ return duration.ToMilliseconds();
+}
+
+void
+SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex)
+{
+ mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
+ if (!mTask) {
+ return;
+ }
+ mTask->DispatchBoundary(NS_LITERAL_STRING("word"),
+ GetTimeDurationFromStart(), mCurrentIndex);
+}
+
+void
+SpeechTaskCallback::OnError(uint32_t aIndex)
+{
+ if (!mTask) {
+ return;
+ }
+ mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
+}
+
+void
+SpeechTaskCallback::OnDidFinishSpeaking()
+{
+ mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
+ // no longer needed
+ [mSpeechSynthesizer setDelegate:nil];
+ mTask = nullptr;
+}
+
+@interface SpeechDelegate : NSObject<NSSpeechSynthesizerDelegate>
+{
+@private
+ SpeechTaskCallback* mCallback;
+}
+
+ - (id)initWithCallback:(SpeechTaskCallback*)aCallback;
+@end
+
+@implementation SpeechDelegate
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback
+{
+ [super init];
+ mCallback = aCallback;
+ return self;
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer *)aSender
+ willSpeakWord:(NSRange)aRange ofString:(NSString*)aString
+{
+ mCallback->OnWillSpeakWord(aRange.location);
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer *)aSender
+ didFinishSpeaking:(BOOL)aFinishedSpeaking
+{
+ mCallback->OnDidFinishSpeaking();
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+ didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
+ ofString:(NSString*)aString
+ message:(NSString*)aMessage
+{
+ mCallback->OnError(aCharacterIndex);
+}
+@end
+
+namespace mozilla {
+namespace dom {
+
+struct OSXVoice
+{
+ OSXVoice() : mIsDefault(false)
+ {
+ }
+
+ nsString mUri;
+ nsString mName;
+ nsString mLocale;
+ bool mIsDefault;
+};
+
+class RegisterVoicesRunnable final : public Runnable
+{
+public:
+ RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService,
+ nsTArray<OSXVoice>& aList)
+ : mSpeechService(aSpeechService)
+ , mVoices(aList)
+ {
+ }
+
+ NS_IMETHOD Run() override;
+
+private:
+ ~RegisterVoicesRunnable()
+ {
+ }
+
+ // This runnable always use sync mode. It is unnecesarry to reference object
+ OSXSpeechSynthesizerService* mSpeechService;
+ nsTArray<OSXVoice>& mVoices;
+};
+
+NS_IMETHODIMP
+RegisterVoicesRunnable::Run()
+{
+ nsresult rv;
+ nsCOMPtr<nsISynthVoiceRegistry> registry =
+ do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
+ if (!registry) {
+ return rv;
+ }
+
+ for (OSXVoice voice : mVoices) {
+ rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ continue;
+ }
+
+ if (voice.mIsDefault) {
+ registry->SetDefaultVoice(voice.mUri, true);
+ }
+ }
+
+ registry->NotifyVoicesChanged();
+
+ return NS_OK;
+}
+
+class EnumVoicesRunnable final : public Runnable
+{
+public:
+ explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
+ : mSpeechService(aSpeechService)
+ {
+ }
+
+ NS_IMETHOD Run() override;
+
+private:
+ ~EnumVoicesRunnable()
+ {
+ }
+
+ RefPtr<OSXSpeechSynthesizerService> mSpeechService;
+};
+
+NS_IMETHODIMP
+EnumVoicesRunnable::Run()
+{
+ NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT;
+
+ AutoTArray<OSXVoice, 64> list;
+
+ NSArray* voices = [NSSpeechSynthesizer availableVoices];
+ NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
+
+ for (NSString* voice in voices) {
+ OSXVoice item;
+
+ NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
+
+ nsAutoString identifier;
+ nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier],
+ identifier);
+
+ nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName);
+
+ nsCocoaUtils::GetStringForNSString(
+ [attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
+ item.mLocale.ReplaceChar('_', '-');
+
+ item.mUri.AssignLiteral("urn:moz-tts:osx:");
+ item.mUri.Append(identifier);
+
+ if ([voice isEqualToString:defaultVoice]) {
+ item.mIsDefault = true;
+ }
+
+ list.AppendElement(item);
+ }
+
+ RefPtr<RegisterVoicesRunnable> runnable = new RegisterVoicesRunnable(mSpeechService, list);
+ NS_DispatchToMainThread(runnable, NS_DISPATCH_SYNC);
+
+ return NS_OK;
+
+ NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
+}
+
+StaticRefPtr<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::sSingleton;
+
+NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
+ NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+ NS_INTERFACE_MAP_ENTRY(nsIObserver)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
+NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
+
+OSXSpeechSynthesizerService::OSXSpeechSynthesizerService()
+ : mInitialized(false)
+{
+}
+
+OSXSpeechSynthesizerService::~OSXSpeechSynthesizerService()
+{
+}
+
+bool
+OSXSpeechSynthesizerService::Init()
+{
+ if (Preferences::GetBool("media.webspeech.synth.test") ||
+ !Preferences::GetBool("media.webspeech.synth.enabled")) {
+ // When test is enabled, we shouldn't add OS backend (Bug 1160844)
+ return false;
+ }
+
+ nsCOMPtr<nsIThread> thread;
+ if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
+ return false;
+ }
+
+ // Get all the voices and register in the SynthVoiceRegistry
+ nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
+ thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
+
+ mInitialized = true;
+ return true;
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Speak(const nsAString& aText,
+ const nsAString& aUri,
+ float aVolume,
+ float aRate,
+ float aPitch,
+ nsISpeechTask* aTask)
+{
+ NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT;
+
+ MOZ_ASSERT(StringBeginsWith(aUri, NS_LITERAL_STRING("urn:moz-tts:osx:")),
+ "OSXSpeechSynthesizerService doesn't allow this voice URI");
+
+ NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
+ // strlen("urn:moz-tts:osx:") == 16
+ NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
+ [synth setVoice:identifier];
+
+ // default rate is 180-220
+ [synth setObject:[NSNumber numberWithInt:aRate * 200]
+ forProperty:NSSpeechRateProperty error:nil];
+ // volume allows 0.0-1.0
+ [synth setObject:[NSNumber numberWithFloat:aVolume]
+ forProperty:NSSpeechVolumeProperty error:nil];
+ // Use default pitch value to calculate this
+ NSNumber* defaultPitch =
+ [synth objectForProperty:NSSpeechPitchBaseProperty error:nil];
+ if (defaultPitch) {
+ int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
+ [synth setObject:[NSNumber numberWithInt:newPitch]
+ forProperty:NSSpeechPitchBaseProperty error:nil];
+ }
+
+ nsAutoString escapedText;
+ // We need to map the the offsets from the given text to the escaped text.
+ // The index of the offsets array is the position in the escaped text,
+ // the element value is the position in the user-supplied text.
+ nsTArray<size_t> offsets;
+ offsets.SetCapacity(aText.Length());
+
+ // This loop looks for occurances of "[[" or "]]", escapes them, and
+ // populates the offsets array to supply a map to the original offsets.
+ for (size_t i = 0; i < aText.Length(); i++) {
+ if (aText.Length() > i + 1 &&
+ ((aText[i] == ']' && aText[i+1] == ']') ||
+ (aText[i] == '[' && aText[i+1] == '['))) {
+ escapedText.AppendLiteral(DLIM_ESCAPE_START);
+ offsets.AppendElements(strlen(DLIM_ESCAPE_START));
+ escapedText.Append(aText[i]);
+ offsets.AppendElement(i);
+ escapedText.Append(aText[++i]);
+ offsets.AppendElement(i);
+ escapedText.AppendLiteral(DLIM_ESCAPE_END);
+ offsets.AppendElements(strlen(DLIM_ESCAPE_END));
+ } else {
+ escapedText.Append(aText[i]);
+ offsets.AppendElement(i);
+ }
+ }
+
+ RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
+ nsresult rv = aTask->Setup(callback, 0, 0, 0);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ SpeechDelegate* delegate = [[SpeechDelegate alloc] initWithCallback:callback];
+ [synth setDelegate:delegate];
+ [delegate release ];
+
+ NSString* text = nsCocoaUtils::ToNSString(escapedText);
+ BOOL success = [synth startSpeakingString:text];
+ NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
+
+ aTask->DispatchStart();
+ return NS_OK;
+
+ NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType)
+{
+ *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
+ const char16_t* aData)
+{
+ return NS_OK;
+}
+
+OSXSpeechSynthesizerService*
+OSXSpeechSynthesizerService::GetInstance()
+{
+ MOZ_ASSERT(NS_IsMainThread());
+ if (XRE_GetProcessType() != GeckoProcessType_Default) {
+ return nullptr;
+ }
+
+ if (!sSingleton) {
+ RefPtr<OSXSpeechSynthesizerService> speechService =
+ new OSXSpeechSynthesizerService();
+ if (speechService->Init()) {
+ sSingleton = speechService;
+ }
+ }
+ return sSingleton;
+}
+
+already_AddRefed<OSXSpeechSynthesizerService>
+OSXSpeechSynthesizerService::GetInstanceForService()
+{
+ RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
+ return speechService.forget();
+}
+
+void
+OSXSpeechSynthesizerService::Shutdown()
+{
+ if (!sSingleton) {
+ return;
+ }
+ sSingleton = nullptr;
+}
+
+} // namespace dom
+} // namespace mozilla