diff options
Diffstat (limited to 'dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm')
-rw-r--r-- | dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm new file mode 100644 index 000000000..df6c5eaa0 --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm @@ -0,0 +1,499 @@ +/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 sw=2 et tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsServiceManagerUtils.h" +#include "nsObjCExceptions.h" +#include "nsCocoaUtils.h" +#include "nsThreadUtils.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/Preferences.h" +#include "mozilla/Assertions.h" +#include "OSXSpeechSynthesizerService.h" + +#import <Cocoa/Cocoa.h> + +// We can escape the default delimiters ("[[" and "]]") by temporarily +// changing the delimiters just before they appear, and changing them back +// just after. +#define DLIM_ESCAPE_START "[[dlim (( ))]]" +#define DLIM_ESCAPE_END "((dlim [[ ]]))" + +using namespace mozilla; + +class SpeechTaskCallback final : public nsISpeechTaskCallback +{ +public: + SpeechTaskCallback(nsISpeechTask* aTask, + NSSpeechSynthesizer* aSynth, + const nsTArray<size_t>& aOffsets) + : mTask(aTask) + , mSpeechSynthesizer(aSynth) + , mOffsets(aOffsets) + { + mStartingTime = TimeStamp::Now(); + } + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + void OnWillSpeakWord(uint32_t aIndex); + void OnError(uint32_t aIndex); + void OnDidFinishSpeaking(); + +private: + virtual ~SpeechTaskCallback() + { + [mSpeechSynthesizer release]; + } + + float GetTimeDurationFromStart(); + + nsCOMPtr<nsISpeechTask> mTask; + NSSpeechSynthesizer* mSpeechSynthesizer; + TimeStamp mStartingTime; + uint32_t mCurrentIndex; + nsTArray<size_t> mOffsets; +}; + +NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback) + +NS_IMETHODIMP +SpeechTaskCallback::OnCancel() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer stopSpeaking]; + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnPause() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; + if (!mTask) { + // When calling pause() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnResume() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer continueSpeaking]; + if (!mTask) { + // When calling resume() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnVolumeChanged(float aVolume) +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume] + forProperty:NSSpeechVolumeProperty error:nil]; + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +float +SpeechTaskCallback::GetTimeDurationFromStart() +{ + TimeDuration duration = TimeStamp::Now() - mStartingTime; + return duration.ToMilliseconds(); +} + +void +SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex) +{ + mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex; + if (!mTask) { + return; + } + mTask->DispatchBoundary(NS_LITERAL_STRING("word"), + GetTimeDurationFromStart(), mCurrentIndex); +} + +void +SpeechTaskCallback::OnError(uint32_t aIndex) +{ + if (!mTask) { + return; + } + mTask->DispatchError(GetTimeDurationFromStart(), aIndex); +} + +void +SpeechTaskCallback::OnDidFinishSpeaking() +{ + mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex); + // no longer needed + [mSpeechSynthesizer setDelegate:nil]; + mTask = nullptr; +} + +@interface SpeechDelegate : NSObject<NSSpeechSynthesizerDelegate> +{ +@private + SpeechTaskCallback* mCallback; +} + + - (id)initWithCallback:(SpeechTaskCallback*)aCallback; +@end + +@implementation SpeechDelegate +- (id)initWithCallback:(SpeechTaskCallback*)aCallback +{ + [super init]; + mCallback = aCallback; + return self; +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)aSender + willSpeakWord:(NSRange)aRange ofString:(NSString*)aString +{ + mCallback->OnWillSpeakWord(aRange.location); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)aSender + didFinishSpeaking:(BOOL)aFinishedSpeaking +{ + mCallback->OnDidFinishSpeaking(); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender + didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex + ofString:(NSString*)aString + message:(NSString*)aMessage +{ + mCallback->OnError(aCharacterIndex); +} +@end + +namespace mozilla { +namespace dom { + +struct OSXVoice +{ + OSXVoice() : mIsDefault(false) + { + } + + nsString mUri; + nsString mName; + nsString mLocale; + bool mIsDefault; +}; + +class RegisterVoicesRunnable final : public Runnable +{ +public: + RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, + nsTArray<OSXVoice>& aList) + : mSpeechService(aSpeechService) + , mVoices(aList) + { + } + + NS_IMETHOD Run() override; + +private: + ~RegisterVoicesRunnable() + { + } + + // This runnable always use sync mode. It is unnecesarry to reference object + OSXSpeechSynthesizerService* mSpeechService; + nsTArray<OSXVoice>& mVoices; +}; + +NS_IMETHODIMP +RegisterVoicesRunnable::Run() +{ + nsresult rv; + nsCOMPtr<nsISynthVoiceRegistry> registry = + do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv); + if (!registry) { + return rv; + } + + for (OSXVoice voice : mVoices) { + rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false); + if (NS_WARN_IF(NS_FAILED(rv))) { + continue; + } + + if (voice.mIsDefault) { + registry->SetDefaultVoice(voice.mUri, true); + } + } + + registry->NotifyVoicesChanged(); + + return NS_OK; +} + +class EnumVoicesRunnable final : public Runnable +{ +public: + explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService) + : mSpeechService(aSpeechService) + { + } + + NS_IMETHOD Run() override; + +private: + ~EnumVoicesRunnable() + { + } + + RefPtr<OSXSpeechSynthesizerService> mSpeechService; +}; + +NS_IMETHODIMP +EnumVoicesRunnable::Run() +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + AutoTArray<OSXVoice, 64> list; + + NSArray* voices = [NSSpeechSynthesizer availableVoices]; + NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; + + for (NSString* voice in voices) { + OSXVoice item; + + NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice]; + + nsAutoString identifier; + nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], + identifier); + + nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName); + + nsCocoaUtils::GetStringForNSString( + [attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale); + item.mLocale.ReplaceChar('_', '-'); + + item.mUri.AssignLiteral("urn:moz-tts:osx:"); + item.mUri.Append(identifier); + + if ([voice isEqualToString:defaultVoice]) { + item.mIsDefault = true; + } + + list.AppendElement(item); + } + + RefPtr<RegisterVoicesRunnable> runnable = new RegisterVoicesRunnable(mSpeechService, list); + NS_DispatchToMainThread(runnable, NS_DISPATCH_SYNC); + + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +StaticRefPtr<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::sSingleton; + +NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(OSXSpeechSynthesizerService) +NS_IMPL_RELEASE(OSXSpeechSynthesizerService) + +OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() + : mInitialized(false) +{ +} + +OSXSpeechSynthesizerService::~OSXSpeechSynthesizerService() +{ +} + +bool +OSXSpeechSynthesizerService::Init() +{ + if (Preferences::GetBool("media.webspeech.synth.test") || + !Preferences::GetBool("media.webspeech.synth.enabled")) { + // When test is enabled, we shouldn't add OS backend (Bug 1160844) + return false; + } + + nsCOMPtr<nsIThread> thread; + if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) { + return false; + } + + // Get all the voices and register in the SynthVoiceRegistry + nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this); + thread->Dispatch(runnable, NS_DISPATCH_NORMAL); + + mInitialized = true; + return true; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::Speak(const nsAString& aText, + const nsAString& aUri, + float aVolume, + float aRate, + float aPitch, + nsISpeechTask* aTask) +{ + NS_OBJC_BEGIN_TRY_ABORT_BLOCK_NSRESULT; + + MOZ_ASSERT(StringBeginsWith(aUri, NS_LITERAL_STRING("urn:moz-tts:osx:")), + "OSXSpeechSynthesizerService doesn't allow this voice URI"); + + NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init]; + // strlen("urn:moz-tts:osx:") == 16 + NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16)); + [synth setVoice:identifier]; + + // default rate is 180-220 + [synth setObject:[NSNumber numberWithInt:aRate * 200] + forProperty:NSSpeechRateProperty error:nil]; + // volume allows 0.0-1.0 + [synth setObject:[NSNumber numberWithFloat:aVolume] + forProperty:NSSpeechVolumeProperty error:nil]; + // Use default pitch value to calculate this + NSNumber* defaultPitch = + [synth objectForProperty:NSSpeechPitchBaseProperty error:nil]; + if (defaultPitch) { + int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5); + [synth setObject:[NSNumber numberWithInt:newPitch] + forProperty:NSSpeechPitchBaseProperty error:nil]; + } + + nsAutoString escapedText; + // We need to map the the offsets from the given text to the escaped text. + // The index of the offsets array is the position in the escaped text, + // the element value is the position in the user-supplied text. + nsTArray<size_t> offsets; + offsets.SetCapacity(aText.Length()); + + // This loop looks for occurances of "[[" or "]]", escapes them, and + // populates the offsets array to supply a map to the original offsets. + for (size_t i = 0; i < aText.Length(); i++) { + if (aText.Length() > i + 1 && + ((aText[i] == ']' && aText[i+1] == ']') || + (aText[i] == '[' && aText[i+1] == '['))) { + escapedText.AppendLiteral(DLIM_ESCAPE_START); + offsets.AppendElements(strlen(DLIM_ESCAPE_START)); + escapedText.Append(aText[i]); + offsets.AppendElement(i); + escapedText.Append(aText[++i]); + offsets.AppendElement(i); + escapedText.AppendLiteral(DLIM_ESCAPE_END); + offsets.AppendElements(strlen(DLIM_ESCAPE_END)); + } else { + escapedText.Append(aText[i]); + offsets.AppendElement(i); + } + } + + RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets); + nsresult rv = aTask->Setup(callback, 0, 0, 0); + NS_ENSURE_SUCCESS(rv, rv); + + SpeechDelegate* delegate = [[SpeechDelegate alloc] initWithCallback:callback]; + [synth setDelegate:delegate]; + [delegate release ]; + + NSString* text = nsCocoaUtils::ToNSString(escapedText); + BOOL success = [synth startSpeakingString:text]; + NS_ENSURE_TRUE(success, NS_ERROR_FAILURE); + + aTask->DispatchStart(); + return NS_OK; + + NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType) +{ + *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; + return NS_OK; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) +{ + return NS_OK; +} + +OSXSpeechSynthesizerService* +OSXSpeechSynthesizerService::GetInstance() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (XRE_GetProcessType() != GeckoProcessType_Default) { + return nullptr; + } + + if (!sSingleton) { + RefPtr<OSXSpeechSynthesizerService> speechService = + new OSXSpeechSynthesizerService(); + if (speechService->Init()) { + sSingleton = speechService; + } + } + return sSingleton; +} + +already_AddRefed<OSXSpeechSynthesizerService> +OSXSpeechSynthesizerService::GetInstanceForService() +{ + RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance(); + return speechService.forget(); +} + +void +OSXSpeechSynthesizerService::Shutdown() +{ + if (!sSingleton) { + return; + } + sSingleton = nullptr; +} + +} // namespace dom +} // namespace mozilla |