summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/recognition/SpeechRecognition.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/webspeech/recognition/SpeechRecognition.cpp')
-rw-r--r--dom/media/webspeech/recognition/SpeechRecognition.cpp1088
1 files changed, 0 insertions, 1088 deletions
diff --git a/dom/media/webspeech/recognition/SpeechRecognition.cpp b/dom/media/webspeech/recognition/SpeechRecognition.cpp
deleted file mode 100644
index cd57f03ca..000000000
--- a/dom/media/webspeech/recognition/SpeechRecognition.cpp
+++ /dev/null
@@ -1,1088 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* vim:set ts=2 sw=2 sts=2 et cindent: */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "SpeechRecognition.h"
-
-#include "nsCOMPtr.h"
-#include "nsCycleCollectionParticipant.h"
-
-#include "mozilla/dom/BindingUtils.h"
-#include "mozilla/dom/Element.h"
-#include "mozilla/dom/SpeechRecognitionBinding.h"
-#include "mozilla/dom/MediaStreamTrackBinding.h"
-#include "mozilla/dom/MediaStreamError.h"
-#include "mozilla/MediaManager.h"
-#include "mozilla/Preferences.h"
-#include "MediaPrefs.h"
-#include "mozilla/Services.h"
-
-#include "AudioSegment.h"
-#include "endpointer.h"
-
-#include "SpeechGrammar.h"
-#include "mozilla/dom/SpeechRecognitionEvent.h"
-#include "nsContentUtils.h"
-#include "nsIDocument.h"
-#include "nsIObserverService.h"
-#include "nsIPermissionManager.h"
-#include "nsIPrincipal.h"
-#include "nsPIDOMWindow.h"
-#include "nsServiceManagerUtils.h"
-#include "nsQueryObject.h"
-
-#include <algorithm>
-
-// Undo the windows.h damage
-#if defined(XP_WIN) && defined(GetMessage)
-#undef GetMessage
-#endif
-
-namespace mozilla {
-namespace dom {
-
-#define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
-#define DEFAULT_RECOGNITION_SERVICE_PREFIX "pocketsphinx-"
-#define DEFAULT_RECOGNITION_SERVICE "pocketsphinx-en-US"
-
-#define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
-#define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH "media.webspeech.long_silence_length"
-#define PREFERENCE_ENDPOINTER_LONG_SPEECH_LENGTH "media.webspeech.long_speech_length"
-
-static const uint32_t kSAMPLE_RATE = 16000;
-static const uint32_t kSPEECH_DETECTION_TIMEOUT_MS = 10000;
-
-// number of frames corresponding to 300ms of audio to send to endpointer while
-// it's in environment estimation mode
-// kSAMPLE_RATE frames = 1s, kESTIMATION_FRAMES frames = 300ms
-static const uint32_t kESTIMATION_SAMPLES = 300 * kSAMPLE_RATE / 1000;
-
-LogModule*
-GetSpeechRecognitionLog()
-{
- static LazyLogModule sLog("SpeechRecognition");
- return sLog;
-}
-#define SR_LOG(...) MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
-
-already_AddRefed<nsISpeechRecognitionService>
-GetSpeechRecognitionService(const nsAString& aLang)
-{
- nsAutoCString speechRecognitionServiceCID;
-
- nsAdoptingCString prefValue =
- Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_SERVICE);
- nsAutoCString speechRecognitionService;
-
- if (!aLang.IsEmpty()) {
- speechRecognitionService =
- NS_LITERAL_CSTRING(DEFAULT_RECOGNITION_SERVICE_PREFIX) +
- NS_ConvertUTF16toUTF8(aLang);
- } else if (!prefValue.IsEmpty()) {
- speechRecognitionService = prefValue;
- } else {
- speechRecognitionService = DEFAULT_RECOGNITION_SERVICE;
- }
-
- if (MediaPrefs::WebSpeechFakeRecognitionService()) {
- speechRecognitionServiceCID =
- NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake";
- } else {
- speechRecognitionServiceCID =
- NS_LITERAL_CSTRING(NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX) +
- speechRecognitionService;
- }
-
- nsresult rv;
- nsCOMPtr<nsISpeechRecognitionService> recognitionService;
- recognitionService = do_GetService(speechRecognitionServiceCID.get(), &rv);
- return recognitionService.forget();
-}
-
-NS_IMPL_CYCLE_COLLECTION_INHERITED(SpeechRecognition, DOMEventTargetHelper, mDOMStream, mSpeechGrammarList)
-
-NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION_INHERITED(SpeechRecognition)
- NS_INTERFACE_MAP_ENTRY(nsIObserver)
-NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
-
-NS_IMPL_ADDREF_INHERITED(SpeechRecognition, DOMEventTargetHelper)
-NS_IMPL_RELEASE_INHERITED(SpeechRecognition, DOMEventTargetHelper)
-
-SpeechRecognition::SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow)
- : DOMEventTargetHelper(aOwnerWindow)
- , mEndpointer(kSAMPLE_RATE)
- , mAudioSamplesPerChunk(mEndpointer.FrameSize())
- , mSpeechDetectionTimer(do_CreateInstance(NS_TIMER_CONTRACTID))
- , mSpeechGrammarList(new SpeechGrammarList(GetParentObject()))
- , mInterimResults(false)
- , mMaxAlternatives(1)
-{
- SR_LOG("created SpeechRecognition");
-
- if (MediaPrefs::WebSpeechTestEnabled()) {
- nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
- obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
- obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false);
- }
-
- mEndpointer.set_speech_input_complete_silence_length(
- Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 1250000));
- mEndpointer.set_long_speech_input_complete_silence_length(
- Preferences::GetInt(PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH, 2500000));
- mEndpointer.set_long_speech_length(
- Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 3 * 1000000));
- Reset();
-}
-
-bool
-SpeechRecognition::StateBetween(FSMState begin, FSMState end)
-{
- return mCurrentState >= begin && mCurrentState <= end;
-}
-
-void
-SpeechRecognition::SetState(FSMState state)
-{
- mCurrentState = state;
- SR_LOG("Transitioned to state %s", GetName(mCurrentState));
- return;
-}
-
-JSObject*
-SpeechRecognition::WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto)
-{
- return SpeechRecognitionBinding::Wrap(aCx, this, aGivenProto);
-}
-
-bool
-SpeechRecognition::IsAuthorized(JSContext* aCx, JSObject* aGlobal)
-{
- nsCOMPtr<nsIPrincipal> principal = nsContentUtils::ObjectPrincipal(aGlobal);
-
- nsresult rv;
- nsCOMPtr<nsIPermissionManager> mgr = do_GetService(NS_PERMISSIONMANAGER_CONTRACTID, &rv);
- if (NS_WARN_IF(NS_FAILED(rv))) {
- return false;
- }
-
- uint32_t speechRecognition = nsIPermissionManager::UNKNOWN_ACTION;
- rv = mgr->TestExactPermissionFromPrincipal(principal, "speech-recognition", &speechRecognition);
- if (NS_WARN_IF(NS_FAILED(rv))) {
- return false;
- }
-
- bool hasPermission = (speechRecognition == nsIPermissionManager::ALLOW_ACTION);
-
- return (hasPermission || MediaPrefs::WebSpeechRecognitionForceEnabled()
- || MediaPrefs::WebSpeechTestEnabled())
- && MediaPrefs::WebSpeechRecognitionEnabled();
-}
-
-already_AddRefed<SpeechRecognition>
-SpeechRecognition::Constructor(const GlobalObject& aGlobal,
- ErrorResult& aRv)
-{
- nsCOMPtr<nsPIDOMWindowInner> win = do_QueryInterface(aGlobal.GetAsSupports());
- if (!win) {
- aRv.Throw(NS_ERROR_FAILURE);
- }
-
- MOZ_ASSERT(win->IsInnerWindow());
- RefPtr<SpeechRecognition> object = new SpeechRecognition(win);
- return object.forget();
-}
-
-nsISupports*
-SpeechRecognition::GetParentObject() const
-{
- return GetOwner();
-}
-
-void
-SpeechRecognition::ProcessEvent(SpeechEvent* aEvent)
-{
- SR_LOG("Processing %s, current state is %s",
- GetName(aEvent),
- GetName(mCurrentState));
-
- if (mAborted && aEvent->mType != EVENT_ABORT) {
- // ignore all events while aborting
- return;
- }
-
- Transition(aEvent);
-}
-
-void
-SpeechRecognition::Transition(SpeechEvent* aEvent)
-{
- switch (mCurrentState) {
- case STATE_IDLE:
- switch (aEvent->mType) {
- case EVENT_START:
- // TODO: may want to time out if we wait too long
- // for user to approve
- WaitForAudioData(aEvent);
- break;
- case EVENT_STOP:
- case EVENT_ABORT:
- case EVENT_AUDIO_DATA:
- case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
- case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
- DoNothing(aEvent);
- break;
- case EVENT_AUDIO_ERROR:
- case EVENT_RECOGNITIONSERVICE_ERROR:
- AbortError(aEvent);
- break;
- case EVENT_COUNT:
- MOZ_CRASH("Invalid event EVENT_COUNT");
- }
- break;
- case STATE_STARTING:
- switch (aEvent->mType) {
- case EVENT_AUDIO_DATA:
- StartedAudioCapture(aEvent);
- break;
- case EVENT_AUDIO_ERROR:
- case EVENT_RECOGNITIONSERVICE_ERROR:
- AbortError(aEvent);
- break;
- case EVENT_ABORT:
- AbortSilently(aEvent);
- break;
- case EVENT_STOP:
- Reset();
- break;
- case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
- case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
- DoNothing(aEvent);
- break;
- case EVENT_START:
- SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
- MOZ_CRASH();
- case EVENT_COUNT:
- MOZ_CRASH("Invalid event EVENT_COUNT");
- }
- break;
- case STATE_ESTIMATING:
- switch (aEvent->mType) {
- case EVENT_AUDIO_DATA:
- WaitForEstimation(aEvent);
- break;
- case EVENT_STOP:
- StopRecordingAndRecognize(aEvent);
- break;
- case EVENT_ABORT:
- AbortSilently(aEvent);
- break;
- case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
- case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
- case EVENT_RECOGNITIONSERVICE_ERROR:
- DoNothing(aEvent);
- break;
- case EVENT_AUDIO_ERROR:
- AbortError(aEvent);
- break;
- case EVENT_START:
- SR_LOG("STATE_ESTIMATING: Unhandled event %d", aEvent->mType);
- MOZ_CRASH();
- case EVENT_COUNT:
- MOZ_CRASH("Invalid event EVENT_COUNT");
- }
- break;
- case STATE_WAITING_FOR_SPEECH:
- switch (aEvent->mType) {
- case EVENT_AUDIO_DATA:
- DetectSpeech(aEvent);
- break;
- case EVENT_STOP:
- StopRecordingAndRecognize(aEvent);
- break;
- case EVENT_ABORT:
- AbortSilently(aEvent);
- break;
- case EVENT_AUDIO_ERROR:
- AbortError(aEvent);
- break;
- case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
- case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
- case EVENT_RECOGNITIONSERVICE_ERROR:
- DoNothing(aEvent);
- break;
- case EVENT_START:
- SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
- MOZ_CRASH();
- case EVENT_COUNT:
- MOZ_CRASH("Invalid event EVENT_COUNT");
- }
- break;
- case STATE_RECOGNIZING:
- switch (aEvent->mType) {
- case EVENT_AUDIO_DATA:
- WaitForSpeechEnd(aEvent);
- break;
- case EVENT_STOP:
- StopRecordingAndRecognize(aEvent);
- break;
- case EVENT_AUDIO_ERROR:
- case EVENT_RECOGNITIONSERVICE_ERROR:
- AbortError(aEvent);
- break;
- case EVENT_ABORT:
- AbortSilently(aEvent);
- break;
- case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
- case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
- DoNothing(aEvent);
- break;
- case EVENT_START:
- SR_LOG("STATE_RECOGNIZING: Unhandled aEvent %s", GetName(aEvent));
- MOZ_CRASH();
- case EVENT_COUNT:
- MOZ_CRASH("Invalid event EVENT_COUNT");
- }
- break;
- case STATE_WAITING_FOR_RESULT:
- switch (aEvent->mType) {
- case EVENT_STOP:
- DoNothing(aEvent);
- break;
- case EVENT_AUDIO_ERROR:
- case EVENT_RECOGNITIONSERVICE_ERROR:
- AbortError(aEvent);
- break;
- case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
- NotifyFinalResult(aEvent);
- break;
- case EVENT_AUDIO_DATA:
- DoNothing(aEvent);
- break;
- case EVENT_ABORT:
- AbortSilently(aEvent);
- break;
- case EVENT_START:
- case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
- SR_LOG("STATE_WAITING_FOR_RESULT: Unhandled aEvent %s", GetName(aEvent));
- MOZ_CRASH();
- case EVENT_COUNT:
- MOZ_CRASH("Invalid event EVENT_COUNT");
- }
- break;
- case STATE_COUNT:
- MOZ_CRASH("Invalid state STATE_COUNT");
- }
-
- return;
-}
-
-/*
- * Handle a segment of recorded audio data.
- * Returns the number of samples that were processed.
- */
-uint32_t
-SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate)
-{
- AudioSegment::ChunkIterator iterator(*aSegment);
- uint32_t samples = 0;
- while (!iterator.IsEnded()) {
- float out;
- mEndpointer.ProcessAudio(*iterator, &out);
- samples += iterator->GetDuration();
- iterator.Next();
- }
-
- mRecognitionService->ProcessAudioSegment(aSegment, aTrackRate);
- return samples;
-}
-
-/****************************************************************************
- * FSM Transition functions
- *
- * If a transition function may cause a DOM event to be fired,
- * it may also be re-entered, since the event handler may cause the
- * event loop to spin and new SpeechEvents to be processed.
- *
- * Rules:
- * 1) These methods should call SetState as soon as possible.
- * 2) If these methods dispatch DOM events, or call methods that dispatch
- * DOM events, that should be done as late as possible.
- * 3) If anything must happen after dispatching a DOM event, make sure
- * the state is still what the method expected it to be.
- ****************************************************************************/
-
-void
-SpeechRecognition::Reset()
-{
- SetState(STATE_IDLE);
- mRecognitionService = nullptr;
- mEstimationSamples = 0;
- mBufferedSamples = 0;
- mSpeechDetectionTimer->Cancel();
- mAborted = false;
-}
-
-void
-SpeechRecognition::ResetAndEnd()
-{
- Reset();
- DispatchTrustedEvent(NS_LITERAL_STRING("end"));
-}
-
-void
-SpeechRecognition::WaitForAudioData(SpeechEvent* aEvent)
-{
- SetState(STATE_STARTING);
-}
-
-void
-SpeechRecognition::StartedAudioCapture(SpeechEvent* aEvent)
-{
- SetState(STATE_ESTIMATING);
-
- mEndpointer.SetEnvironmentEstimationMode();
- mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
-
- DispatchTrustedEvent(NS_LITERAL_STRING("audiostart"));
- if (mCurrentState == STATE_ESTIMATING) {
- DispatchTrustedEvent(NS_LITERAL_STRING("start"));
- }
-}
-
-void
-SpeechRecognition::StopRecordingAndRecognize(SpeechEvent* aEvent)
-{
- SetState(STATE_WAITING_FOR_RESULT);
-
- MOZ_ASSERT(mRecognitionService, "Service deleted before recording done");
- mRecognitionService->SoundEnd();
-
- StopRecording();
-}
-
-void
-SpeechRecognition::WaitForEstimation(SpeechEvent* aEvent)
-{
- SetState(STATE_ESTIMATING);
-
- mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
- if (mEstimationSamples > kESTIMATION_SAMPLES) {
- mEndpointer.SetUserInputMode();
- SetState(STATE_WAITING_FOR_SPEECH);
- }
-}
-
-void
-SpeechRecognition::DetectSpeech(SpeechEvent* aEvent)
-{
- SetState(STATE_WAITING_FOR_SPEECH);
-
- ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
- if (mEndpointer.DidStartReceivingSpeech()) {
- mSpeechDetectionTimer->Cancel();
- SetState(STATE_RECOGNIZING);
- DispatchTrustedEvent(NS_LITERAL_STRING("speechstart"));
- }
-}
-
-void
-SpeechRecognition::WaitForSpeechEnd(SpeechEvent* aEvent)
-{
- SetState(STATE_RECOGNIZING);
-
- ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
- if (mEndpointer.speech_input_complete()) {
- DispatchTrustedEvent(NS_LITERAL_STRING("speechend"));
-
- if (mCurrentState == STATE_RECOGNIZING) {
- // FIXME: StopRecordingAndRecognize should only be called for single
- // shot services for continuous we should just inform the service
- StopRecordingAndRecognize(aEvent);
- }
- }
-}
-
-void
-SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent)
-{
- ResetAndEnd();
-
- RootedDictionary<SpeechRecognitionEventInit> init(RootingCx());
- init.mBubbles = true;
- init.mCancelable = false;
- // init.mResultIndex = 0;
- init.mResults = aEvent->mRecognitionResultList;
- init.mInterpretation = JS::NullValue();
- // init.mEmma = nullptr;
-
- RefPtr<SpeechRecognitionEvent> event =
- SpeechRecognitionEvent::Constructor(this, NS_LITERAL_STRING("result"), init);
- event->SetTrusted(true);
-
- bool defaultActionEnabled;
- this->DispatchEvent(event, &defaultActionEnabled);
-}
-
-void
-SpeechRecognition::DoNothing(SpeechEvent* aEvent)
-{
-}
-
-void
-SpeechRecognition::AbortSilently(SpeechEvent* aEvent)
-{
- if (mRecognitionService) {
- mRecognitionService->Abort();
- }
-
- if (mDOMStream) {
- StopRecording();
- }
-
- ResetAndEnd();
-}
-
-void
-SpeechRecognition::AbortError(SpeechEvent* aEvent)
-{
- AbortSilently(aEvent);
- NotifyError(aEvent);
-}
-
-void
-SpeechRecognition::NotifyError(SpeechEvent* aEvent)
-{
- aEvent->mError->SetTrusted(true);
-
- bool defaultActionEnabled;
- this->DispatchEvent(aEvent->mError, &defaultActionEnabled);
-
- return;
-}
-
-/**************************************
- * Event triggers and other functions *
- **************************************/
-NS_IMETHODIMP
-SpeechRecognition::StartRecording(DOMMediaStream* aDOMStream)
-{
- // hold a reference so that the underlying stream
- // doesn't get Destroy()'ed
- mDOMStream = aDOMStream;
-
- if (NS_WARN_IF(!mDOMStream->GetPlaybackStream())) {
- return NS_ERROR_UNEXPECTED;
- }
- mSpeechListener = new SpeechStreamListener(this);
- mDOMStream->GetPlaybackStream()->AddListener(mSpeechListener);
-
- mEndpointer.StartSession();
-
- return mSpeechDetectionTimer->Init(this, kSPEECH_DETECTION_TIMEOUT_MS,
- nsITimer::TYPE_ONE_SHOT);
-}
-
-NS_IMETHODIMP
-SpeechRecognition::StopRecording()
-{
- // we only really need to remove the listener explicitly when testing,
- // as our JS code still holds a reference to mDOMStream and only assigning
- // it to nullptr isn't guaranteed to free the stream and the listener.
- mDOMStream->GetPlaybackStream()->RemoveListener(mSpeechListener);
- mSpeechListener = nullptr;
- mDOMStream = nullptr;
-
- mEndpointer.EndSession();
- DispatchTrustedEvent(NS_LITERAL_STRING("audioend"));
-
- return NS_OK;
-}
-
-NS_IMETHODIMP
-SpeechRecognition::Observe(nsISupports* aSubject, const char* aTopic,
- const char16_t* aData)
-{
- MOZ_ASSERT(NS_IsMainThread(), "Observer invoked off the main thread");
-
- if (!strcmp(aTopic, NS_TIMER_CALLBACK_TOPIC) &&
- StateBetween(STATE_IDLE, STATE_WAITING_FOR_SPEECH)) {
-
- DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR,
- SpeechRecognitionErrorCode::No_speech,
- NS_LITERAL_STRING("No speech detected (timeout)"));
- } else if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) {
- nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
- obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC);
- obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC);
- } else if (MediaPrefs::WebSpeechFakeFSMEvents() &&
- !strcmp(aTopic, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC)) {
- ProcessTestEventRequest(aSubject, nsDependentString(aData));
- }
-
- return NS_OK;
-}
-
-void
-SpeechRecognition::ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName)
-{
- if (aEventName.EqualsLiteral("EVENT_ABORT")) {
- Abort();
- } else if (aEventName.EqualsLiteral("EVENT_AUDIO_ERROR")) {
- DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR,
- SpeechRecognitionErrorCode::Audio_capture, // TODO different codes?
- NS_LITERAL_STRING("AUDIO_ERROR test event"));
- } else {
- NS_ASSERTION(MediaPrefs::WebSpeechFakeRecognitionService(),
- "Got request for fake recognition service event, but "
- TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE " is unset");
-
- // let the fake recognition service handle the request
- }
-
- return;
-}
-
-already_AddRefed<SpeechGrammarList>
-SpeechRecognition::Grammars() const
-{
- RefPtr<SpeechGrammarList> speechGrammarList = mSpeechGrammarList;
- return speechGrammarList.forget();
-}
-
-void
-SpeechRecognition::SetGrammars(SpeechGrammarList& aArg)
-{
- mSpeechGrammarList = &aArg;
-}
-
-void
-SpeechRecognition::GetLang(nsString& aRetVal) const
-{
- aRetVal = mLang;
-}
-
-void
-SpeechRecognition::SetLang(const nsAString& aArg)
-{
- mLang = aArg;
-}
-
-bool
-SpeechRecognition::GetContinuous(ErrorResult& aRv) const
-{
- aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
- return false;
-}
-
-void
-SpeechRecognition::SetContinuous(bool aArg, ErrorResult& aRv)
-{
- aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
- return;
-}
-
-bool
-SpeechRecognition::InterimResults() const
-{
- return mInterimResults;
-}
-
-void
-SpeechRecognition::SetInterimResults(bool aArg)
-{
- mInterimResults = aArg;
- return;
-}
-
-uint32_t
-SpeechRecognition::MaxAlternatives() const
-{
- return mMaxAlternatives;
-}
-
-void
-SpeechRecognition::SetMaxAlternatives(uint32_t aArg)
-{
- mMaxAlternatives = aArg;
- return;
-}
-
-void
-SpeechRecognition::GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const
-{
- aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
- return;
-}
-
-void
-SpeechRecognition::SetServiceURI(const nsAString& aArg, ErrorResult& aRv)
-{
- aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
- return;
-}
-
-void
-SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream, ErrorResult& aRv)
-{
- if (mCurrentState != STATE_IDLE) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return;
- }
-
- if (!SetRecognitionService(aRv)) {
- return;
- }
-
- if (!ValidateAndSetGrammarList(aRv)) {
- return;
- }
-
- nsresult rv;
- rv = mRecognitionService->Initialize(this);
- if (NS_WARN_IF(NS_FAILED(rv))) {
- return;
- }
-
- MediaStreamConstraints constraints;
- constraints.mAudio.SetAsBoolean() = true;
-
- if (aStream.WasPassed()) {
- StartRecording(&aStream.Value());
- } else {
- AutoNoJSAPI();
- MediaManager* manager = MediaManager::Get();
- manager->GetUserMedia(GetOwner(),
- constraints,
- new GetUserMediaSuccessCallback(this),
- new GetUserMediaErrorCallback(this));
- }
-
- RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_START);
- NS_DispatchToMainThread(event);
-}
-
-bool
-SpeechRecognition::SetRecognitionService(ErrorResult& aRv)
-{
- // See: https://dvcs.w3.org/hg/speech-api/raw-file/tip/webspeechapi.html#dfn-lang
- if (!mLang.IsEmpty()) {
- mRecognitionService = GetSpeechRecognitionService(mLang);
-
- if (!mRecognitionService) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
-
- return true;
- }
-
- nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
- if(!window) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
- nsCOMPtr<nsIDocument> document = window->GetExtantDoc();
- if(!document) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
- nsCOMPtr<Element> element = document->GetRootElement();
- if(!element) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
-
- nsAutoString lang;
- element->GetLang(lang);
- mRecognitionService = GetSpeechRecognitionService(lang);
-
- if (!mRecognitionService) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
-
- return true;
-}
-
-bool
-SpeechRecognition::ValidateAndSetGrammarList(ErrorResult& aRv)
-{
- if (!mSpeechGrammarList) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
-
- uint32_t grammarListLength = mSpeechGrammarList->Length();
- if (0 == grammarListLength) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
-
- for (uint32_t count = 0; count < grammarListLength; ++count) {
- RefPtr<SpeechGrammar> speechGrammar = mSpeechGrammarList->Item(count, aRv);
- if (aRv.Failed()) {
- return false;
- }
- if (NS_FAILED(mRecognitionService->ValidateAndSetGrammarList(speechGrammar.get(), nullptr))) {
- aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
- return false;
- }
- }
-
- return true;
-}
-
-void
-SpeechRecognition::Stop()
-{
- RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_STOP);
- NS_DispatchToMainThread(event);
-}
-
-void
-SpeechRecognition::Abort()
-{
- if (mAborted) {
- return;
- }
-
- mAborted = true;
- RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_ABORT);
- NS_DispatchToMainThread(event);
-}
-
-void
-SpeechRecognition::DispatchError(EventType aErrorType,
- SpeechRecognitionErrorCode aErrorCode,
- const nsAString& aMessage)
-{
- MOZ_ASSERT(NS_IsMainThread());
- MOZ_ASSERT(aErrorType == EVENT_RECOGNITIONSERVICE_ERROR ||
- aErrorType == EVENT_AUDIO_ERROR, "Invalid error type!");
-
- RefPtr<SpeechRecognitionError> srError =
- new SpeechRecognitionError(nullptr, nullptr, nullptr);
-
- srError->InitSpeechRecognitionError(NS_LITERAL_STRING("error"), true, false,
- aErrorCode, aMessage);
-
- RefPtr<SpeechEvent> event = new SpeechEvent(this, aErrorType);
- event->mError = srError;
- NS_DispatchToMainThread(event);
-}
-
-/*
- * Buffer audio samples into mAudioSamplesBuffer until aBufferSize.
- * Updates mBufferedSamples and returns the number of samples that were buffered.
- */
-uint32_t
-SpeechRecognition::FillSamplesBuffer(const int16_t* aSamples,
- uint32_t aSampleCount)
-{
- MOZ_ASSERT(mBufferedSamples < mAudioSamplesPerChunk);
- MOZ_ASSERT(mAudioSamplesBuffer.get());
-
- int16_t* samplesBuffer = static_cast<int16_t*>(mAudioSamplesBuffer->Data());
- size_t samplesToCopy = std::min(aSampleCount,
- mAudioSamplesPerChunk - mBufferedSamples);
-
- memcpy(samplesBuffer + mBufferedSamples, aSamples,
- samplesToCopy * sizeof(int16_t));
-
- mBufferedSamples += samplesToCopy;
- return samplesToCopy;
-}
-
-/*
- * Split a samples buffer starting of a given size into
- * chunks of equal size. The chunks are stored in the array
- * received as argument.
- * Returns the offset of the end of the last chunk that was
- * created.
- */
-uint32_t
-SpeechRecognition::SplitSamplesBuffer(const int16_t* aSamplesBuffer,
- uint32_t aSampleCount,
- nsTArray<RefPtr<SharedBuffer>>& aResult)
-{
- uint32_t chunkStart = 0;
-
- while (chunkStart + mAudioSamplesPerChunk <= aSampleCount) {
- RefPtr<SharedBuffer> chunk =
- SharedBuffer::Create(mAudioSamplesPerChunk * sizeof(int16_t));
-
- memcpy(chunk->Data(), aSamplesBuffer + chunkStart,
- mAudioSamplesPerChunk * sizeof(int16_t));
-
- aResult.AppendElement(chunk.forget());
- chunkStart += mAudioSamplesPerChunk;
- }
-
- return chunkStart;
-}
-
-AudioSegment*
-SpeechRecognition::CreateAudioSegment(nsTArray<RefPtr<SharedBuffer>>& aChunks)
-{
- AudioSegment* segment = new AudioSegment();
- for (uint32_t i = 0; i < aChunks.Length(); ++i) {
- RefPtr<SharedBuffer> buffer = aChunks[i];
- const int16_t* chunkData = static_cast<const int16_t*>(buffer->Data());
-
- AutoTArray<const int16_t*, 1> channels;
- channels.AppendElement(chunkData);
- segment->AppendFrames(buffer.forget(), channels, mAudioSamplesPerChunk,
- PRINCIPAL_HANDLE_NONE);
- }
-
- return segment;
-}
-
-void
-SpeechRecognition::FeedAudioData(already_AddRefed<SharedBuffer> aSamples,
- uint32_t aDuration,
- MediaStreamListener* aProvider, TrackRate aTrackRate)
-{
- NS_ASSERTION(!NS_IsMainThread(),
- "FeedAudioData should not be called in the main thread");
-
- // Endpointer expects to receive samples in chunks whose size is a
- // multiple of its frame size.
- // Since we can't assume we will receive the frames in appropriate-sized
- // chunks, we must buffer and split them in chunks of mAudioSamplesPerChunk
- // (a multiple of Endpointer's frame size) before feeding to Endpointer.
-
- // ensure aSamples is deleted
- RefPtr<SharedBuffer> refSamples = aSamples;
-
- uint32_t samplesIndex = 0;
- const int16_t* samples = static_cast<int16_t*>(refSamples->Data());
- AutoTArray<RefPtr<SharedBuffer>, 5> chunksToSend;
-
- // fill up our buffer and make a chunk out of it, if possible
- if (mBufferedSamples > 0) {
- samplesIndex += FillSamplesBuffer(samples, aDuration);
-
- if (mBufferedSamples == mAudioSamplesPerChunk) {
- chunksToSend.AppendElement(mAudioSamplesBuffer.forget());
- mBufferedSamples = 0;
- }
- }
-
- // create sample chunks of correct size
- if (samplesIndex < aDuration) {
- samplesIndex += SplitSamplesBuffer(samples + samplesIndex,
- aDuration - samplesIndex,
- chunksToSend);
- }
-
- // buffer remaining samples
- if (samplesIndex < aDuration) {
- mBufferedSamples = 0;
- mAudioSamplesBuffer =
- SharedBuffer::Create(mAudioSamplesPerChunk * sizeof(int16_t));
-
- FillSamplesBuffer(samples + samplesIndex, aDuration - samplesIndex);
- }
-
- AudioSegment* segment = CreateAudioSegment(chunksToSend);
- RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_AUDIO_DATA);
- event->mAudioSegment = segment;
- event->mProvider = aProvider;
- event->mTrackRate = aTrackRate;
- NS_DispatchToMainThread(event);
-
- return;
-}
-
-const char*
-SpeechRecognition::GetName(FSMState aId)
-{
- static const char* names[] = {
- "STATE_IDLE",
- "STATE_STARTING",
- "STATE_ESTIMATING",
- "STATE_WAITING_FOR_SPEECH",
- "STATE_RECOGNIZING",
- "STATE_WAITING_FOR_RESULT",
- };
-
- MOZ_ASSERT(aId < STATE_COUNT);
- MOZ_ASSERT(ArrayLength(names) == STATE_COUNT);
- return names[aId];
-}
-
-const char*
-SpeechRecognition::GetName(SpeechEvent* aEvent)
-{
- static const char* names[] = {
- "EVENT_START",
- "EVENT_STOP",
- "EVENT_ABORT",
- "EVENT_AUDIO_DATA",
- "EVENT_AUDIO_ERROR",
- "EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT",
- "EVENT_RECOGNITIONSERVICE_FINAL_RESULT",
- "EVENT_RECOGNITIONSERVICE_ERROR"
- };
-
- MOZ_ASSERT(aEvent->mType < EVENT_COUNT);
- MOZ_ASSERT(ArrayLength(names) == EVENT_COUNT);
- return names[aEvent->mType];
-}
-
-SpeechEvent::~SpeechEvent()
-{
- delete mAudioSegment;
-}
-
-NS_IMETHODIMP
-SpeechEvent::Run()
-{
- mRecognition->ProcessEvent(this);
- return NS_OK;
-}
-
-NS_IMPL_ISUPPORTS(SpeechRecognition::GetUserMediaSuccessCallback, nsIDOMGetUserMediaSuccessCallback)
-
-NS_IMETHODIMP
-SpeechRecognition::GetUserMediaSuccessCallback::OnSuccess(nsISupports* aStream)
-{
- RefPtr<DOMMediaStream> stream = do_QueryObject(aStream);
- if (!stream) {
- return NS_ERROR_NO_INTERFACE;
- }
- mRecognition->StartRecording(stream);
- return NS_OK;
-}
-
-NS_IMPL_ISUPPORTS(SpeechRecognition::GetUserMediaErrorCallback, nsIDOMGetUserMediaErrorCallback)
-
-NS_IMETHODIMP
-SpeechRecognition::GetUserMediaErrorCallback::OnError(nsISupports* aError)
-{
- RefPtr<MediaStreamError> error = do_QueryObject(aError);
- if (!error) {
- return NS_OK;
- }
- SpeechRecognitionErrorCode errorCode;
-
- nsAutoString name;
- error->GetName(name);
- if (name.EqualsLiteral("PERMISSION_DENIED")) {
- errorCode = SpeechRecognitionErrorCode::Not_allowed;
- } else {
- errorCode = SpeechRecognitionErrorCode::Audio_capture;
- }
-
- nsAutoString message;
- error->GetMessage(message);
- mRecognition->DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode,
- message);
- return NS_OK;
-}
-
-} // namespace dom
-} // namespace mozilla