summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/recognition/SpeechRecognition.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/webspeech/recognition/SpeechRecognition.h')
-rw-r--r--dom/media/webspeech/recognition/SpeechRecognition.h296
1 files changed, 0 insertions, 296 deletions
diff --git a/dom/media/webspeech/recognition/SpeechRecognition.h b/dom/media/webspeech/recognition/SpeechRecognition.h
deleted file mode 100644
index 3f1ab7977..000000000
--- a/dom/media/webspeech/recognition/SpeechRecognition.h
+++ /dev/null
@@ -1,296 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* vim:set ts=2 sw=2 sts=2 et cindent: */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef mozilla_dom_SpeechRecognition_h
-#define mozilla_dom_SpeechRecognition_h
-
-#include "mozilla/Attributes.h"
-#include "mozilla/DOMEventTargetHelper.h"
-#include "nsCOMPtr.h"
-#include "nsString.h"
-#include "nsWrapperCache.h"
-#include "nsTArray.h"
-#include "js/TypeDecls.h"
-
-#include "nsIDOMNavigatorUserMedia.h"
-#include "nsITimer.h"
-#include "MediaEngine.h"
-#include "MediaStreamGraph.h"
-#include "AudioSegment.h"
-#include "mozilla/WeakPtr.h"
-
-#include "SpeechGrammarList.h"
-#include "SpeechRecognitionResultList.h"
-#include "SpeechStreamListener.h"
-#include "nsISpeechRecognitionService.h"
-#include "endpointer.h"
-
-#include "mozilla/dom/SpeechRecognitionError.h"
-
-namespace mozilla {
-
-namespace dom {
-
-#define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent"
-#define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End"
-
-class GlobalObject;
-class SpeechEvent;
-
-LogModule* GetSpeechRecognitionLog();
-#define SR_LOG(...) MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
-
-class SpeechRecognition final : public DOMEventTargetHelper,
- public nsIObserver,
- public SupportsWeakPtr<SpeechRecognition>
-{
-public:
- MOZ_DECLARE_WEAKREFERENCE_TYPENAME(SpeechRecognition)
- explicit SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow);
-
- NS_DECL_ISUPPORTS_INHERITED
- NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechRecognition, DOMEventTargetHelper)
-
- NS_DECL_NSIOBSERVER
-
- nsISupports* GetParentObject() const;
-
- JSObject* WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto) override;
-
- static bool IsAuthorized(JSContext* aCx, JSObject* aGlobal);
-
- static already_AddRefed<SpeechRecognition>
- Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
-
- already_AddRefed<SpeechGrammarList> Grammars() const;
-
- void SetGrammars(mozilla::dom::SpeechGrammarList& aArg);
-
- void GetLang(nsString& aRetVal) const;
-
- void SetLang(const nsAString& aArg);
-
- bool GetContinuous(ErrorResult& aRv) const;
-
- void SetContinuous(bool aArg, ErrorResult& aRv);
-
- bool InterimResults() const;
-
- void SetInterimResults(bool aArg);
-
- uint32_t MaxAlternatives() const;
-
- void SetMaxAlternatives(uint32_t aArg);
-
- void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
-
- void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
-
- void Start(const Optional<NonNull<DOMMediaStream>>& aStream, ErrorResult& aRv);
-
- void Stop();
-
- void Abort();
-
- IMPL_EVENT_HANDLER(audiostart)
- IMPL_EVENT_HANDLER(soundstart)
- IMPL_EVENT_HANDLER(speechstart)
- IMPL_EVENT_HANDLER(speechend)
- IMPL_EVENT_HANDLER(soundend)
- IMPL_EVENT_HANDLER(audioend)
- IMPL_EVENT_HANDLER(result)
- IMPL_EVENT_HANDLER(nomatch)
- IMPL_EVENT_HANDLER(error)
- IMPL_EVENT_HANDLER(start)
- IMPL_EVENT_HANDLER(end)
-
- enum EventType {
- EVENT_START,
- EVENT_STOP,
- EVENT_ABORT,
- EVENT_AUDIO_DATA,
- EVENT_AUDIO_ERROR,
- EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
- EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
- EVENT_RECOGNITIONSERVICE_ERROR,
- EVENT_COUNT
- };
-
- void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage);
- uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);
- uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<RefPtr<SharedBuffer>>& aResult);
- AudioSegment* CreateAudioSegment(nsTArray<RefPtr<SharedBuffer>>& aChunks);
- void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider, TrackRate aTrackRate);
-
- friend class SpeechEvent;
-private:
- virtual ~SpeechRecognition() {};
-
- enum FSMState {
- STATE_IDLE,
- STATE_STARTING,
- STATE_ESTIMATING,
- STATE_WAITING_FOR_SPEECH,
- STATE_RECOGNIZING,
- STATE_WAITING_FOR_RESULT,
- STATE_COUNT
- };
-
- void SetState(FSMState state);
- bool StateBetween(FSMState begin, FSMState end);
-
- bool SetRecognitionService(ErrorResult& aRv);
- bool ValidateAndSetGrammarList(ErrorResult& aRv);
-
- class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback
- {
- public:
- NS_DECL_ISUPPORTS
- NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK
-
- explicit GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)
- : mRecognition(aRecognition)
- {}
-
- private:
- virtual ~GetUserMediaSuccessCallback() {}
-
- RefPtr<SpeechRecognition> mRecognition;
- };
-
- class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback
- {
- public:
- NS_DECL_ISUPPORTS
- NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK
-
- explicit GetUserMediaErrorCallback(SpeechRecognition* aRecognition)
- : mRecognition(aRecognition)
- {}
-
- private:
- virtual ~GetUserMediaErrorCallback() {}
-
- RefPtr<SpeechRecognition> mRecognition;
- };
-
- NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream);
- NS_IMETHOD StopRecording();
-
- uint32_t ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate);
- void NotifyError(SpeechEvent* aEvent);
-
- void ProcessEvent(SpeechEvent* aEvent);
- void Transition(SpeechEvent* aEvent);
-
- void Reset();
- void ResetAndEnd();
- void WaitForAudioData(SpeechEvent* aEvent);
- void StartedAudioCapture(SpeechEvent* aEvent);
- void StopRecordingAndRecognize(SpeechEvent* aEvent);
- void WaitForEstimation(SpeechEvent* aEvent);
- void DetectSpeech(SpeechEvent* aEvent);
- void WaitForSpeechEnd(SpeechEvent* aEvent);
- void NotifyFinalResult(SpeechEvent* aEvent);
- void DoNothing(SpeechEvent* aEvent);
- void AbortSilently(SpeechEvent* aEvent);
- void AbortError(SpeechEvent* aEvent);
-
- RefPtr<DOMMediaStream> mDOMStream;
- RefPtr<SpeechStreamListener> mSpeechListener;
- nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
-
- FSMState mCurrentState;
-
- Endpointer mEndpointer;
- uint32_t mEstimationSamples;
-
- uint32_t mAudioSamplesPerChunk;
-
- // buffer holds one chunk of mAudioSamplesPerChunk
- // samples before feeding it to mEndpointer
- RefPtr<SharedBuffer> mAudioSamplesBuffer;
- uint32_t mBufferedSamples;
-
- nsCOMPtr<nsITimer> mSpeechDetectionTimer;
- bool mAborted;
-
- nsString mLang;
-
- RefPtr<SpeechGrammarList> mSpeechGrammarList;
-
- // WebSpeechAPI (http://bit.ly/1gIl7DC) states:
- //
- // 1. Default value MUST be false
- // 2. If true, interim results SHOULD be returned
- // 3. If false, interim results MUST NOT be returned
- //
- // Pocketsphinx does not return interm results; so, defaulting
- // mInterimResults to false, then ignoring its subsequent value
- // is a conforming implementation.
- bool mInterimResults;
-
- // WebSpeechAPI (http://bit.ly/1JAiqeo) states:
- //
- // 1. Default value is 1
- // 2. Subsequent value is the "maximum number of SpeechRecognitionAlternatives per result"
- //
- // Pocketsphinx can only return at maximum a single SpeechRecognitionAlternative
- // per SpeechRecognitionResult. So defaulting mMaxAlternatives to 1, for all non
- // zero values ignoring mMaxAlternatives while for a 0 value returning no
- // SpeechRecognitionAlternative per result is a conforming implementation.
- uint32_t mMaxAlternatives;
-
- void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName);
-
- const char* GetName(FSMState aId);
- const char* GetName(SpeechEvent* aId);
-};
-
-class SpeechEvent : public Runnable
-{
-public:
- SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType)
- : mAudioSegment(0)
- , mRecognitionResultList(nullptr)
- , mError(nullptr)
- , mRecognition(aRecognition)
- , mType(aType)
- , mTrackRate(0)
- {
- }
-
- ~SpeechEvent();
-
- NS_IMETHOD Run() override;
- AudioSegment* mAudioSegment;
- RefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff
- RefPtr<SpeechRecognitionError> mError;
-
- friend class SpeechRecognition;
-private:
- SpeechRecognition* mRecognition;
-
- // for AUDIO_DATA events, keep a reference to the provider
- // of the data (i.e., the SpeechStreamListener) to ensure it
- // is kept alive (and keeps SpeechRecognition alive) until this
- // event gets processed.
- RefPtr<MediaStreamListener> mProvider;
- SpeechRecognition::EventType mType;
- TrackRate mTrackRate;
-};
-
-} // namespace dom
-
-inline nsISupports*
-ToSupports(dom::SpeechRecognition* aRec)
-{
- return ToSupports(static_cast<DOMEventTargetHelper*>(aRec));
-}
-
-} // namespace mozilla
-
-#endif