summaryrefslogtreecommitdiffstats
path: root/media/pocketsphinx/src/allphone_search.h
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /media/pocketsphinx/src/allphone_search.h
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'media/pocketsphinx/src/allphone_search.h')
-rw-r--r--media/pocketsphinx/src/allphone_search.h179
1 files changed, 179 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/allphone_search.h b/media/pocketsphinx/src/allphone_search.h
new file mode 100644
index 000000000..2658e87c2
--- /dev/null
+++ b/media/pocketsphinx/src/allphone_search.h
@@ -0,0 +1,179 @@
+/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2014 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+/*
+ * allphone_search.h -- Search structures for phoneme decoding.
+ */
+
+
+#ifndef __ALLPHONE_SEARCH_H__
+#define __ALLPHONE_SEARCH_H__
+
+
+/* SphinxBase headers. */
+#include <sphinxbase/glist.h>
+#include <sphinxbase/cmd_ln.h>
+#include <sphinxbase/ngram_model.h>
+#include <sphinxbase/bitvec.h>
+
+/* Local headers. */
+#include "pocketsphinx_internal.h"
+#include "blkarray_list.h"
+#include "hmm.h"
+
+/**
+ * Models a single unique <senone-sequence, tmat> pair.
+ * Can represent several different triphones, but all with the same parent basephone.
+ * (NOTE: Word-position attribute of triphone is ignored.)
+ */
+typedef struct phmm_s {
+ hmm_t hmm; /**< Base HMM structure */
+ s3pid_t pid; /**< Phone id (temp. during init.) */
+ s3cipid_t ci; /**< Parent basephone for this PHMM */
+ bitvec_t *lc; /**< Set (bit-vector) of left context phones seen for this PHMM */
+ bitvec_t *rc; /**< Set (bit-vector) of right context phones seen for this PHMM */
+ struct phmm_s *next; /**< Next unique PHMM for same parent basephone */
+ struct plink_s *succlist; /**< List of predecessor PHMM nodes */
+} phmm_t;
+
+/**
+ * List of links from a PHMM node to its successors; one link per successor.
+ */
+typedef struct plink_s {
+ phmm_t *phmm; /**< Successor PHMM node */
+ struct plink_s *next; /**< Next link for parent PHMM node */
+} plink_t;
+
+/**
+ * History (paths) information at any point in allphone Viterbi search.
+ */
+typedef struct history_s {
+ phmm_t *phmm; /**< PHMM ending this path */
+ int32 score; /**< Path score for this path */
+ int32 tscore; /**< Transition score for this path */
+ frame_idx_t ef; /**< End frame */
+ int32 hist; /**< Previous history entry */
+} history_t;
+
+/**
+ * Phone level segmentation information
+ */
+typedef struct phseg_s {
+ s3cipid_t ci; /* CI-phone id */
+ frame_idx_t sf, ef; /* Start and end frame for this phone occurrence */
+ int32 score; /* Acoustic score for this segment of alignment */
+ int32 tscore; /* Transition ("LM") score for this segment */
+} phseg_t;
+
+/**
+ * Segment iterator over list of phseg
+ */
+typedef struct phseg_iter_s {
+ ps_seg_t base;
+ glist_t seg;
+} phseg_iter_t;
+
+/**
+ * Implementation of allphone search structure.
+ */
+typedef struct allphone_search_s {
+ ps_search_t base;
+
+ hmm_context_t *hmmctx; /**< HMM context. */
+ ngram_model_t *lm; /**< Ngram model set */
+ int32 ci_only; /**< Use context-independent phones for decoding */
+ phmm_t **ci_phmm; /**< PHMM lists (for each CI phone) */
+ int32 *ci2lmwid; /**< Mapping of CI phones to LM word IDs */
+
+ int32 beam, pbeam; /**< Effective beams after applying beam_factor */
+ int32 lw, inspen; /**< Language weights */
+
+ frame_idx_t frame; /**< Current frame. */
+ float32 ascale; /**< Acoustic score scale for posterior probabilities. */
+
+ int32 n_tot_frame; /**< Total number of frames processed */
+ int32 n_hmm_eval; /**< Total HMMs evaluated this utt */
+ int32 n_sen_eval; /**< Total senones evaluated this utt */
+
+ /* Backtrace information */
+ blkarray_list_t *history; /**< List of history nodes allocated in each frame */
+ /* Hypothesis DAG */
+ glist_t segments;
+
+ ptmr_t perf; /**< Performance counter */
+
+} allphone_search_t;
+
+/**
+ * Create, initialize and return a search module.
+ */
+ps_search_t *allphone_search_init(ngram_model_t * lm,
+ cmd_ln_t * config,
+ acmod_t * acmod,
+ dict_t * dict, dict2pid_t * d2p);
+
+/**
+ * Deallocate search structure.
+ */
+void allphone_search_free(ps_search_t * search);
+
+/**
+ * Update allphone search module.
+ */
+int allphone_search_reinit(ps_search_t * search, dict_t * dict,
+ dict2pid_t * d2p);
+
+/**
+ * Prepare the allphone search structure for beginning decoding of the next
+ * utterance.
+ */
+int allphone_search_start(ps_search_t * search);
+
+/**
+ * Step one frame forward through the Viterbi search.
+ */
+int allphone_search_step(ps_search_t * search, int frame_idx);
+
+/**
+ * Windup and clean the allphone search structure after utterance.
+ */
+int allphone_search_finish(ps_search_t * search);
+
+/**
+ * Get hypothesis string from the allphone search.
+ */
+char const *allphone_search_hyp(ps_search_t * search, int32 * out_score,
+ int32 * out_is_final);
+
+#endif /* __ALLPHONE_SEARCH_H__ */