1 files changed, 357 insertions, 0 deletions
diff --git a/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp
new file mode 100644
index 000000000..b7aa5f2d7
--- /dev/null
+++ b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp
@@ -0,0 +1,357 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsThreadUtils.h"
+#include "nsXPCOMCIDInternal.h"
+#include "PocketSphinxSpeechRecognitionService.h"
+#include "nsIFile.h"
+#include "SpeechGrammar.h"
+#include "SpeechRecognition.h"
+#include "SpeechRecognitionAlternative.h"
+#include "SpeechRecognitionResult.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsIObserverService.h"
+#include "MediaPrefs.h"
+#include "mozilla/Services.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsMemory.h"
+
+extern "C" {
+#include "pocketsphinx/pocketsphinx.h"
+#include "sphinxbase/logmath.h"
+#include "sphinxbase/sphinx_config.h"
+#include "sphinxbase/jsgf.h"
+}
+
+namespace mozilla {
+
+using namespace dom;
+
+class DecodeResultTask : public Runnable
+{
+public:
+  DecodeResultTask(const nsString& hypstring,
+                   float64 confidence,
+                   WeakPtr<dom::SpeechRecognition> recognition)
+      : mResult(hypstring),
+        mConfidence(confidence),
+        mRecognition(recognition),
+        mWorkerThread(do_GetCurrentThread())
+  {
+    MOZ_ASSERT(
+      !NS_IsMainThread()); // This should be running on the worker thread
+  }
+
+  NS_IMETHOD
+  Run() override
+  {
+    MOZ_ASSERT(NS_IsMainThread()); // This method is supposed to run on the main
+                                   // thread!
+
+    // Declare javascript result events
+    RefPtr<SpeechEvent> event = new SpeechEvent(
+      mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
+    SpeechRecognitionResultList* resultList =
+      new SpeechRecognitionResultList(mRecognition);
+    SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
+    if (0 < mRecognition->MaxAlternatives()) {
+      SpeechRecognitionAlternative* alternative =
+        new SpeechRecognitionAlternative(mRecognition);
+
+      alternative->mTranscript = mResult;
+      alternative->mConfidence = mConfidence;
+
+      result->mItems.AppendElement(alternative);
+    }
+    resultList->mItems.AppendElement(result);
+
+    event->mRecognitionResultList = resultList;
+    NS_DispatchToMainThread(event);
+
+    // If we don't destroy the thread when we're done with it, it will hang
+    // around forever... bad!
+    // But thread->Shutdown must be called from the main thread, not from the
+    // thread itself.
+    return mWorkerThread->Shutdown();
+  }
+
+private:
+  nsString mResult;
+  float64 mConfidence;
+  WeakPtr<dom::SpeechRecognition> mRecognition;
+  nsCOMPtr<nsIThread> mWorkerThread;
+};
+
+class DecodeTask : public Runnable
+{
+public:
+  DecodeTask(WeakPtr<dom::SpeechRecognition> recogntion,
+             const nsTArray<int16_t>& audiovector, ps_decoder_t* ps)
+      : mRecognition(recogntion), mAudiovector(audiovector), mPs(ps)
+  {
+  }
+
+  NS_IMETHOD
+  Run() override
+  {
+    char const* hyp;
+    int rv;
+    int32 final;
+    int32 logprob;
+    float64 confidence;
+    nsAutoCString hypoValue;
+
+    rv = ps_start_utt(mPs);
+    rv = ps_process_raw(mPs, &mAudiovector[0], mAudiovector.Length(), FALSE,
+                        FALSE);
+
+    rv = ps_end_utt(mPs);
+    confidence = 0;
+    if (rv >= 0) {
+      hyp = ps_get_hyp_final(mPs, &final);
+      if (hyp && final) {
+        logprob = ps_get_prob(mPs);
+        confidence = logmath_exp(ps_get_logmath(mPs), logprob);
+        hypoValue.Assign(hyp);
+      }
+    }
+
+    nsCOMPtr<nsIRunnable> resultrunnable =
+      new DecodeResultTask(NS_ConvertUTF8toUTF16(hypoValue), confidence, mRecognition);
+    return NS_DispatchToMainThread(resultrunnable);
+  }
+
+private:
+  WeakPtr<dom::SpeechRecognition> mRecognition;
+  nsTArray<int16_t> mAudiovector;
+  ps_decoder_t* mPs;
+};
+
+NS_IMPL_ISUPPORTS(PocketSphinxSpeechRecognitionService,
+                  nsISpeechRecognitionService, nsIObserver)
+
+PocketSphinxSpeechRecognitionService::PocketSphinxSpeechRecognitionService()
+{
+  mSpeexState = nullptr;
+
+  // get root folder
+  nsCOMPtr<nsIFile> tmpFile;
+  nsAutoString aStringAMPath;   // am folder
+  nsAutoString aStringDictPath; // dict folder
+
+  NS_GetSpecialDirectory(NS_GRE_DIR, getter_AddRefs(tmpFile));
+#if defined(XP_WIN) // for some reason, on windows NS_GRE_DIR is not bin root,
+                    // but bin/browser
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING(".."));
+#endif
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("models"));
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("en-US"));
+  tmpFile->GetPath(aStringAMPath);
+
+  NS_GetSpecialDirectory(NS_GRE_DIR, getter_AddRefs(tmpFile));
+#if defined(XP_WIN) // for some reason, on windows NS_GRE_DIR is not bin root,
+                    // but bin/browser
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING(".."));
+#endif
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("models"));     //
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("dict"));       //
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("en-US.dic")); //
+  tmpFile->GetPath(aStringDictPath);
+
+  // FOR B2G PATHS HARDCODED (APPEND /DATA ON THE BEGINING, FOR DESKTOP, ONLY
+  // MODELS/ RELATIVE TO ROOT
+  mPSConfig = cmd_ln_init(nullptr, ps_args(), TRUE, "-bestpath", "yes", "-hmm",
+                          ToNewUTF8String(aStringAMPath), // acoustic model
+                          "-dict", ToNewUTF8String(aStringDictPath), nullptr);
+  if (mPSConfig == nullptr) {
+    ISDecoderCreated = false;
+  } else {
+    mPSHandle = ps_init(mPSConfig);
+    if (mPSHandle == nullptr) {
+      ISDecoderCreated = false;
+    } else {
+      ISDecoderCreated = true;
+    }
+  }
+
+  ISGrammarCompiled = false;
+}
+
+PocketSphinxSpeechRecognitionService::~PocketSphinxSpeechRecognitionService()
+{
+  if (mPSConfig) {
+    free(mPSConfig);
+  }
+  if (mPSHandle) {
+    free(mPSHandle);
+  }
+
+  mSpeexState = nullptr;
+}
+
+// CALL START IN JS FALLS HERE
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::Initialize(
+    WeakPtr<SpeechRecognition> aSpeechRecognition)
+{
+  if (!ISDecoderCreated || !ISGrammarCompiled) {
+    return NS_ERROR_NOT_INITIALIZED;
+  } else {
+    mAudioVector.Clear();
+
+    if (mSpeexState) {
+      mSpeexState = nullptr;
+    }
+
+    mRecognition = aSpeechRecognition;
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
+    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false);
+    return NS_OK;
+  }
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::ProcessAudioSegment(
+  AudioSegment* aAudioSegment, int32_t aSampleRate)
+{
+  if (!mSpeexState) {
+    mSpeexState = speex_resampler_init(1, aSampleRate, 16000,
+                                       SPEEX_RESAMPLER_QUALITY_MAX, nullptr);
+  }
+  aAudioSegment->ResampleChunks(mSpeexState, aSampleRate, 16000);
+
+  AudioSegment::ChunkIterator iterator(*aAudioSegment);
+
+  while (!iterator.IsEnded()) {
+    mozilla::AudioChunk& chunk = *(iterator);
+    MOZ_ASSERT(chunk.mBuffer);
+    const int16_t* buf = static_cast<const int16_t*>(chunk.mChannelData[0]);
+
+    for (int i = 0; i < iterator->mDuration; i++) {
+      mAudioVector.AppendElement((int16_t)buf[i]);
+    }
+    iterator.Next();
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::SoundEnd()
+{
+  speex_resampler_destroy(mSpeexState);
+  mSpeexState = nullptr;
+
+  // To create a new thread, get the thread manager
+  nsCOMPtr<nsIThreadManager> tm = do_GetService(NS_THREADMANAGER_CONTRACTID);
+  nsCOMPtr<nsIThread> decodethread;
+  nsresult rv = tm->NewThread(0, 0, getter_AddRefs(decodethread));
+  if (NS_FAILED(rv)) {
+    // In case of failure, call back immediately with an empty string which
+    // indicates failure
+    return NS_OK;
+  }
+
+  nsCOMPtr<nsIRunnable> r =
+    new DecodeTask(mRecognition, mAudioVector, mPSHandle);
+  decodethread->Dispatch(r, nsIEventTarget::DISPATCH_NORMAL);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::ValidateAndSetGrammarList(
+  SpeechGrammar* aSpeechGrammar,
+  nsISpeechGrammarCompilationCallback* aCallback)
+{
+  if (!ISDecoderCreated) {
+    ISGrammarCompiled = false;
+  } else if (aSpeechGrammar) {
+    nsAutoString grammar;
+    ErrorResult rv;
+    aSpeechGrammar->GetSrc(grammar, rv);
+
+    int result = ps_set_jsgf_string(mPSHandle, "name",
+                                    NS_ConvertUTF16toUTF8(grammar).get());
+
+    if (result != 0) {
+      ISGrammarCompiled = false;
+    } else {
+      ps_set_search(mPSHandle, "name");
+
+      ISGrammarCompiled = true;
+    }
+  } else {
+    ISGrammarCompiled = false;
+  }
+
+  return ISGrammarCompiled ? NS_OK : NS_ERROR_NOT_INITIALIZED;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::Abort()
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::Observe(nsISupports* aSubject,
+                                              const char* aTopic,
+                                              const char16_t* aData)
+{
+  MOZ_ASSERT(MediaPrefs::WebSpeechFakeRecognitionService(),
+             "Got request to fake recognition service event, "
+             "but " TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE " is not set");
+
+  if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) {
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC);
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC);
+
+    return NS_OK;
+  }
+
+  const nsDependentString eventName = nsDependentString(aData);
+
+  if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_ERROR")) {
+    mRecognition->DispatchError(
+      SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+      SpeechRecognitionErrorCode::Network, // TODO different codes?
+      NS_LITERAL_STRING("RECOGNITIONSERVICE_ERROR test event"));
+
+  } else if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_FINAL_RESULT")) {
+    RefPtr<SpeechEvent> event = new SpeechEvent(
+      mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
+
+    event->mRecognitionResultList = BuildMockResultList();
+    NS_DispatchToMainThread(event);
+  }
+
+  return NS_OK;
+}
+
+SpeechRecognitionResultList*
+PocketSphinxSpeechRecognitionService::BuildMockResultList()
+{
+  SpeechRecognitionResultList* resultList =
+    new SpeechRecognitionResultList(mRecognition);
+  SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
+  if (0 < mRecognition->MaxAlternatives()) {
+    SpeechRecognitionAlternative* alternative =
+      new SpeechRecognitionAlternative(mRecognition);
+
+    alternative->mTranscript = NS_LITERAL_STRING("Mock final result");
+    alternative->mConfidence = 0.0f;
+
+    result->mItems.AppendElement(alternative);
+  }
+  resultList->mItems.AppendElement(result);
+
+  return resultList;
+}
+
+} // namespace mozilla