diff options
Diffstat (limited to 'dom/media/webspeech/recognition/SpeechRecognition.h')
-rw-r--r-- | dom/media/webspeech/recognition/SpeechRecognition.h | 296 |
1 files changed, 0 insertions, 296 deletions
diff --git a/dom/media/webspeech/recognition/SpeechRecognition.h b/dom/media/webspeech/recognition/SpeechRecognition.h deleted file mode 100644 index 3f1ab7977..000000000 --- a/dom/media/webspeech/recognition/SpeechRecognition.h +++ /dev/null @@ -1,296 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim:set ts=2 sw=2 sts=2 et cindent: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef mozilla_dom_SpeechRecognition_h -#define mozilla_dom_SpeechRecognition_h - -#include "mozilla/Attributes.h" -#include "mozilla/DOMEventTargetHelper.h" -#include "nsCOMPtr.h" -#include "nsString.h" -#include "nsWrapperCache.h" -#include "nsTArray.h" -#include "js/TypeDecls.h" - -#include "nsIDOMNavigatorUserMedia.h" -#include "nsITimer.h" -#include "MediaEngine.h" -#include "MediaStreamGraph.h" -#include "AudioSegment.h" -#include "mozilla/WeakPtr.h" - -#include "SpeechGrammarList.h" -#include "SpeechRecognitionResultList.h" -#include "SpeechStreamListener.h" -#include "nsISpeechRecognitionService.h" -#include "endpointer.h" - -#include "mozilla/dom/SpeechRecognitionError.h" - -namespace mozilla { - -namespace dom { - -#define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent" -#define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End" - -class GlobalObject; -class SpeechEvent; - -LogModule* GetSpeechRecognitionLog(); -#define SR_LOG(...) MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__)) - -class SpeechRecognition final : public DOMEventTargetHelper, - public nsIObserver, - public SupportsWeakPtr<SpeechRecognition> -{ -public: - MOZ_DECLARE_WEAKREFERENCE_TYPENAME(SpeechRecognition) - explicit SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow); - - NS_DECL_ISUPPORTS_INHERITED - NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechRecognition, DOMEventTargetHelper) - - NS_DECL_NSIOBSERVER - - nsISupports* GetParentObject() const; - - JSObject* WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto) override; - - static bool IsAuthorized(JSContext* aCx, JSObject* aGlobal); - - static already_AddRefed<SpeechRecognition> - Constructor(const GlobalObject& aGlobal, ErrorResult& aRv); - - already_AddRefed<SpeechGrammarList> Grammars() const; - - void SetGrammars(mozilla::dom::SpeechGrammarList& aArg); - - void GetLang(nsString& aRetVal) const; - - void SetLang(const nsAString& aArg); - - bool GetContinuous(ErrorResult& aRv) const; - - void SetContinuous(bool aArg, ErrorResult& aRv); - - bool InterimResults() const; - - void SetInterimResults(bool aArg); - - uint32_t MaxAlternatives() const; - - void SetMaxAlternatives(uint32_t aArg); - - void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const; - - void SetServiceURI(const nsAString& aArg, ErrorResult& aRv); - - void Start(const Optional<NonNull<DOMMediaStream>>& aStream, ErrorResult& aRv); - - void Stop(); - - void Abort(); - - IMPL_EVENT_HANDLER(audiostart) - IMPL_EVENT_HANDLER(soundstart) - IMPL_EVENT_HANDLER(speechstart) - IMPL_EVENT_HANDLER(speechend) - IMPL_EVENT_HANDLER(soundend) - IMPL_EVENT_HANDLER(audioend) - IMPL_EVENT_HANDLER(result) - IMPL_EVENT_HANDLER(nomatch) - IMPL_EVENT_HANDLER(error) - IMPL_EVENT_HANDLER(start) - IMPL_EVENT_HANDLER(end) - - enum EventType { - EVENT_START, - EVENT_STOP, - EVENT_ABORT, - EVENT_AUDIO_DATA, - EVENT_AUDIO_ERROR, - EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT, - EVENT_RECOGNITIONSERVICE_FINAL_RESULT, - EVENT_RECOGNITIONSERVICE_ERROR, - EVENT_COUNT - }; - - void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage); - uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount); - uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<RefPtr<SharedBuffer>>& aResult); - AudioSegment* CreateAudioSegment(nsTArray<RefPtr<SharedBuffer>>& aChunks); - void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider, TrackRate aTrackRate); - - friend class SpeechEvent; -private: - virtual ~SpeechRecognition() {}; - - enum FSMState { - STATE_IDLE, - STATE_STARTING, - STATE_ESTIMATING, - STATE_WAITING_FOR_SPEECH, - STATE_RECOGNIZING, - STATE_WAITING_FOR_RESULT, - STATE_COUNT - }; - - void SetState(FSMState state); - bool StateBetween(FSMState begin, FSMState end); - - bool SetRecognitionService(ErrorResult& aRv); - bool ValidateAndSetGrammarList(ErrorResult& aRv); - - class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback - { - public: - NS_DECL_ISUPPORTS - NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK - - explicit GetUserMediaSuccessCallback(SpeechRecognition* aRecognition) - : mRecognition(aRecognition) - {} - - private: - virtual ~GetUserMediaSuccessCallback() {} - - RefPtr<SpeechRecognition> mRecognition; - }; - - class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback - { - public: - NS_DECL_ISUPPORTS - NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK - - explicit GetUserMediaErrorCallback(SpeechRecognition* aRecognition) - : mRecognition(aRecognition) - {} - - private: - virtual ~GetUserMediaErrorCallback() {} - - RefPtr<SpeechRecognition> mRecognition; - }; - - NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream); - NS_IMETHOD StopRecording(); - - uint32_t ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate); - void NotifyError(SpeechEvent* aEvent); - - void ProcessEvent(SpeechEvent* aEvent); - void Transition(SpeechEvent* aEvent); - - void Reset(); - void ResetAndEnd(); - void WaitForAudioData(SpeechEvent* aEvent); - void StartedAudioCapture(SpeechEvent* aEvent); - void StopRecordingAndRecognize(SpeechEvent* aEvent); - void WaitForEstimation(SpeechEvent* aEvent); - void DetectSpeech(SpeechEvent* aEvent); - void WaitForSpeechEnd(SpeechEvent* aEvent); - void NotifyFinalResult(SpeechEvent* aEvent); - void DoNothing(SpeechEvent* aEvent); - void AbortSilently(SpeechEvent* aEvent); - void AbortError(SpeechEvent* aEvent); - - RefPtr<DOMMediaStream> mDOMStream; - RefPtr<SpeechStreamListener> mSpeechListener; - nsCOMPtr<nsISpeechRecognitionService> mRecognitionService; - - FSMState mCurrentState; - - Endpointer mEndpointer; - uint32_t mEstimationSamples; - - uint32_t mAudioSamplesPerChunk; - - // buffer holds one chunk of mAudioSamplesPerChunk - // samples before feeding it to mEndpointer - RefPtr<SharedBuffer> mAudioSamplesBuffer; - uint32_t mBufferedSamples; - - nsCOMPtr<nsITimer> mSpeechDetectionTimer; - bool mAborted; - - nsString mLang; - - RefPtr<SpeechGrammarList> mSpeechGrammarList; - - // WebSpeechAPI (http://bit.ly/1gIl7DC) states: - // - // 1. Default value MUST be false - // 2. If true, interim results SHOULD be returned - // 3. If false, interim results MUST NOT be returned - // - // Pocketsphinx does not return interm results; so, defaulting - // mInterimResults to false, then ignoring its subsequent value - // is a conforming implementation. - bool mInterimResults; - - // WebSpeechAPI (http://bit.ly/1JAiqeo) states: - // - // 1. Default value is 1 - // 2. Subsequent value is the "maximum number of SpeechRecognitionAlternatives per result" - // - // Pocketsphinx can only return at maximum a single SpeechRecognitionAlternative - // per SpeechRecognitionResult. So defaulting mMaxAlternatives to 1, for all non - // zero values ignoring mMaxAlternatives while for a 0 value returning no - // SpeechRecognitionAlternative per result is a conforming implementation. - uint32_t mMaxAlternatives; - - void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName); - - const char* GetName(FSMState aId); - const char* GetName(SpeechEvent* aId); -}; - -class SpeechEvent : public Runnable -{ -public: - SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType) - : mAudioSegment(0) - , mRecognitionResultList(nullptr) - , mError(nullptr) - , mRecognition(aRecognition) - , mType(aType) - , mTrackRate(0) - { - } - - ~SpeechEvent(); - - NS_IMETHOD Run() override; - AudioSegment* mAudioSegment; - RefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff - RefPtr<SpeechRecognitionError> mError; - - friend class SpeechRecognition; -private: - SpeechRecognition* mRecognition; - - // for AUDIO_DATA events, keep a reference to the provider - // of the data (i.e., the SpeechStreamListener) to ensure it - // is kept alive (and keeps SpeechRecognition alive) until this - // event gets processed. - RefPtr<MediaStreamListener> mProvider; - SpeechRecognition::EventType mType; - TrackRate mTrackRate; -}; - -} // namespace dom - -inline nsISupports* -ToSupports(dom::SpeechRecognition* aRec) -{ - return ToSupports(static_cast<DOMEventTargetHelper*>(aRec)); -} - -} // namespace mozilla - -#endif |