summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/recognition/endpointer.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/webspeech/recognition/endpointer.h')
-rw-r--r--dom/media/webspeech/recognition/endpointer.h180
1 files changed, 0 insertions, 180 deletions
diff --git a/dom/media/webspeech/recognition/endpointer.h b/dom/media/webspeech/recognition/endpointer.h
deleted file mode 100644
index 7879d6b9f..000000000
--- a/dom/media/webspeech/recognition/endpointer.h
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright (c) 2013 The Chromium Authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
-#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
-
-#include "energy_endpointer.h"
-
-namespace mozilla {
-
-struct AudioChunk;
-
-// A simple interface to the underlying energy-endpointer implementation, this
-// class lets callers provide audio as being recorded and let them poll to find
-// when the user has stopped speaking.
-//
-// There are two events that may trigger the end of speech:
-//
-// speechInputPossiblyComplete event:
-//
-// Signals that silence/noise has been detected for a *short* amount of
-// time after some speech has been detected. It can be used for low latency
-// UI feedback. To disable it, set it to a large amount.
-//
-// speechInputComplete event:
-//
-// This event is intended to signal end of input and to stop recording.
-// The amount of time to wait after speech is set by
-// speech_input_complete_silence_length_ and optionally two other
-// parameters (see below).
-// This time can be held constant, or can change as more speech is detected.
-// In the latter case, the time changes after a set amount of time from the
-// *beginning* of speech. This is motivated by the expectation that there
-// will be two distinct types of inputs: short search queries and longer
-// dictation style input.
-//
-// Three parameters are used to define the piecewise constant timeout function.
-// The timeout length is speech_input_complete_silence_length until
-// long_speech_length, when it changes to
-// long_speech_input_complete_silence_length.
-class Endpointer {
- public:
- explicit Endpointer(int sample_rate);
-
- // Start the endpointer. This should be called at the beginning of a session.
- void StartSession();
-
- // Stop the endpointer.
- void EndSession();
-
- // Start environment estimation. Audio will be used for environment estimation
- // i.e. noise level estimation.
- void SetEnvironmentEstimationMode();
-
- // Start user input. This should be called when the user indicates start of
- // input, e.g. by pressing a button.
- void SetUserInputMode();
-
- // Process a segment of audio, which may be more than one frame.
- // The status of the last frame will be returned.
- EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
-
- // Get the status of the endpointer.
- EpStatus Status(int64_t *time_us);
-
- // Get the expected frame size for audio chunks. Audio chunks are expected
- // to contain a number of samples that is a multiple of this number, and extra
- // samples will be dropped.
- int32_t FrameSize() const {
- return frame_size_;
- }
-
- // Returns true if the endpointer detected reasonable audio levels above
- // background noise which could be user speech, false if not.
- bool DidStartReceivingSpeech() const {
- return speech_previously_detected_;
- }
-
- bool IsEstimatingEnvironment() const {
- return energy_endpointer_.estimating_environment();
- }
-
- void set_speech_input_complete_silence_length(int64_t time_us) {
- speech_input_complete_silence_length_us_ = time_us;
- }
-
- void set_long_speech_input_complete_silence_length(int64_t time_us) {
- long_speech_input_complete_silence_length_us_ = time_us;
- }
-
- void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
- speech_input_possibly_complete_silence_length_us_ = time_us;
- }
-
- void set_long_speech_length(int64_t time_us) {
- long_speech_length_us_ = time_us;
- }
-
- bool speech_input_complete() const {
- return speech_input_complete_;
- }
-
- // RMS background noise level in dB.
- float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
-
- private:
- // Reset internal states. Helper method common to initial input utterance
- // and following input utternaces.
- void Reset();
-
- // Minimum allowable length of speech input.
- int64_t speech_input_minimum_length_us_;
-
- // The speechInputPossiblyComplete event signals that silence/noise has been
- // detected for a *short* amount of time after some speech has been detected.
- // This proporty specifies the time period.
- int64_t speech_input_possibly_complete_silence_length_us_;
-
- // The speechInputComplete event signals that silence/noise has been
- // detected for a *long* amount of time after some speech has been detected.
- // This property specifies the time period.
- int64_t speech_input_complete_silence_length_us_;
-
- // Same as above, this specifies the required silence period after speech
- // detection. This period is used instead of
- // speech_input_complete_silence_length_ when the utterance is longer than
- // long_speech_length_. This parameter is optional.
- int64_t long_speech_input_complete_silence_length_us_;
-
- // The period of time after which the endpointer should consider
- // long_speech_input_complete_silence_length_ as a valid silence period
- // instead of speech_input_complete_silence_length_. This parameter is
- // optional.
- int64_t long_speech_length_us_;
-
- // First speech onset time, used in determination of speech complete timeout.
- int64_t speech_start_time_us_;
-
- // Most recent end time, used in determination of speech complete timeout.
- int64_t speech_end_time_us_;
-
- int64_t audio_frame_time_us_;
- EpStatus old_ep_status_;
- bool waiting_for_speech_possibly_complete_timeout_;
- bool waiting_for_speech_complete_timeout_;
- bool speech_previously_detected_;
- bool speech_input_complete_;
- EnergyEndpointer energy_endpointer_;
- int sample_rate_;
- int32_t frame_size_;
-};
-
-} // namespace mozilla
-
-#endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_