summaryrefslogtreecommitdiffstats
path: root/media/pocketsphinx/src/ngram_search.c
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /media/pocketsphinx/src/ngram_search.c
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'media/pocketsphinx/src/ngram_search.c')
-rw-r--r--media/pocketsphinx/src/ngram_search.c1409
1 files changed, 1409 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/ngram_search.c b/media/pocketsphinx/src/ngram_search.c
new file mode 100644
index 000000000..47e488c3a
--- /dev/null
+++ b/media/pocketsphinx/src/ngram_search.c
@@ -0,0 +1,1409 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2008 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+/**
+ * @file ngram_search.c N-Gram based multi-pass search ("FBS")
+ */
+
+/* System headers. */
+#include <string.h>
+#include <assert.h>
+
+/* SphinxBase headers. */
+#include <sphinxbase/ckd_alloc.h>
+#include <sphinxbase/listelem_alloc.h>
+#include <sphinxbase/err.h>
+
+/* Local headers. */
+#include "pocketsphinx_internal.h"
+#include "ps_lattice_internal.h"
+#include "ngram_search.h"
+#include "ngram_search_fwdtree.h"
+#include "ngram_search_fwdflat.h"
+
+static int ngram_search_start(ps_search_t *search);
+static int ngram_search_step(ps_search_t *search, int frame_idx);
+static int ngram_search_finish(ps_search_t *search);
+static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
+static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final);
+static int32 ngram_search_prob(ps_search_t *search);
+static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
+
+static ps_searchfuncs_t ngram_funcs = {
+ /* name: */ "ngram",
+ /* start: */ ngram_search_start,
+ /* step: */ ngram_search_step,
+ /* finish: */ ngram_search_finish,
+ /* reinit: */ ngram_search_reinit,
+ /* free: */ ngram_search_free,
+ /* lattice: */ ngram_search_lattice,
+ /* hyp: */ ngram_search_hyp,
+ /* prob: */ ngram_search_prob,
+ /* seg_iter: */ ngram_search_seg_iter,
+};
+
+static ngram_model_t *default_lm;
+
+static void
+ngram_search_update_widmap(ngram_search_t *ngs)
+{
+ char const **words;
+ int32 i, n_words;
+
+ /* It's okay to include fillers since they won't be in the LM */
+ n_words = ps_search_n_words(ngs);
+ words = (char const**)ckd_calloc(n_words, sizeof(*words));
+ /* This will include alternates, again, that's okay since they aren't in the LM */
+ for (i = 0; i < n_words; ++i)
+ words[i] = dict_wordstr(ps_search_dict(ngs), i);
+ ngram_model_set_map_words(ngs->lmset, words, n_words);
+ ckd_free(words);
+}
+
+static void
+ngram_search_calc_beams(ngram_search_t *ngs)
+{
+ cmd_ln_t *config;
+ acmod_t *acmod;
+
+ config = ps_search_config(ngs);
+ acmod = ps_search_acmod(ngs);
+
+ /* Log beam widths. */
+ ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
+ ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
+ ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
+ ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
+ ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
+ ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
+ ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
+
+ /* Absolute pruning parameters. */
+ ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
+ ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
+
+ /* Various penalties which may or may not be useful. */
+ ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
+ ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
+ ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
+ ngs->silpen = ngs->pip
+ + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
+ ngs->fillpen = ngs->pip
+ + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
+
+ /* Language weight ratios for fwdflat and bestpath search. */
+ ngs->fwdflat_fwdtree_lw_ratio =
+ cmd_ln_float32_r(config, "-fwdflatlw")
+ / cmd_ln_float32_r(config, "-lw");
+ ngs->bestpath_fwdtree_lw_ratio =
+ cmd_ln_float32_r(config, "-bestpathlw")
+ / cmd_ln_float32_r(config, "-lw");
+
+ /* Acoustic score scale for posterior probabilities. */
+ ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
+}
+
+ps_search_t *
+ngram_search_init(ngram_model_t *lm,
+ cmd_ln_t *config,
+ acmod_t *acmod,
+ dict_t *dict,
+ dict2pid_t *d2p)
+{
+ ngram_search_t *ngs;
+ static char *lmname = "default";
+
+ /* Make the acmod's feature buffer growable if we are doing two-pass
+ * search. */
+ acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") &&
+ cmd_ln_boolean_r(config, "-fwdtree"));
+
+ ngs = ckd_calloc(1, sizeof(*ngs));
+ ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p);
+ ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
+ acmod->tmat->tp, NULL, acmod->mdef->sseq);
+ if (ngs->hmmctx == NULL) {
+ ps_search_free(ps_search_base(ngs));
+ return NULL;
+ }
+ ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
+ ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
+ ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
+
+ /* Calculate various beam widths and such. */
+ ngram_search_calc_beams(ngs);
+
+ /* Allocate a billion different tables for stuff. */
+ ngs->word_chan = ckd_calloc(dict_size(dict),
+ sizeof(*ngs->word_chan));
+ ngs->word_lat_idx = ckd_calloc(dict_size(dict),
+ sizeof(*ngs->word_lat_idx));
+ ngs->word_active = bitvec_alloc(dict_size(dict));
+ ngs->last_ltrans = ckd_calloc(dict_size(dict),
+ sizeof(*ngs->last_ltrans));
+
+ /* FIXME: All these structures need to be made dynamic with
+ * garbage collection. */
+ ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
+ ngs->bp_table = ckd_calloc(ngs->bp_table_size,
+ sizeof(*ngs->bp_table));
+ /* FIXME: This thing is frickin' huge. */
+ ngs->bscore_stack_size = ngs->bp_table_size * 20;
+ ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
+ sizeof(*ngs->bscore_stack));
+ ngs->n_frame_alloc = 256;
+ ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
+ sizeof(*ngs->bp_table_idx));
+ ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
+
+ /* Allocate active word list array */
+ ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
+ sizeof(**ngs->active_word_list));
+
+ ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
+ if (!ngs->lmset)
+ goto error_out;
+
+ if (ngram_wid(ngs->lmset, S3_FINISH_WORD) ==
+ ngram_unknown_wid(ngs->lmset))
+ {
+ E_ERROR("Language model/set does not contain </s>, "
+ "recognition will fail\n");
+ goto error_out;
+ }
+
+ /* Create word mappings. */
+ ngram_search_update_widmap(ngs);
+
+ /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
+ if (cmd_ln_boolean_r(config, "-fwdtree")) {
+ ngram_fwdtree_init(ngs);
+ ngs->fwdtree = TRUE;
+ ngs->fwdtree_perf.name = "fwdtree";
+ ptmr_init(&ngs->fwdtree_perf);
+ }
+ if (cmd_ln_boolean_r(config, "-fwdflat")) {
+ ngram_fwdflat_init(ngs);
+ ngs->fwdflat = TRUE;
+ ngs->fwdflat_perf.name = "fwdflat";
+ ptmr_init(&ngs->fwdflat_perf);
+ }
+ if (cmd_ln_boolean_r(config, "-bestpath")) {
+ ngs->bestpath = TRUE;
+ ngs->bestpath_perf.name = "bestpath";
+ ptmr_init(&ngs->bestpath_perf);
+ }
+
+ return (ps_search_t *)ngs;
+
+error_out:
+ ngram_search_free((ps_search_t *)ngs);
+ return NULL;
+}
+
+static int
+ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+ int old_n_words;
+ int rv = 0;
+
+ /* Update the number of words. */
+ old_n_words = search->n_words;
+ if (old_n_words != dict_size(dict)) {
+ search->n_words = dict_size(dict);
+ /* Reallocate these temporary arrays. */
+ ckd_free(ngs->word_lat_idx);
+ ckd_free(ngs->word_active);
+ ckd_free(ngs->last_ltrans);
+ ckd_free_2d(ngs->active_word_list);
+ ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
+ ngs->word_active = bitvec_alloc(search->n_words);
+ ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
+ ngs->active_word_list
+ = ckd_calloc_2d(2, search->n_words,
+ sizeof(**ngs->active_word_list));
+ }
+
+ /* Free old dict2pid, dict */
+ ps_search_base_reinit(search, dict, d2p);
+
+ if (ngs->lmset == NULL)
+ return 0;
+
+ /* Update beam widths. */
+ ngram_search_calc_beams(ngs);
+
+ /* Update word mappings. */
+ ngram_search_update_widmap(ngs);
+
+ /* Now rebuild lextrees. */
+ if (ngs->fwdtree) {
+ if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
+ return rv;
+ }
+ if (ngs->fwdflat) {
+ if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
+ return rv;
+ }
+
+ return rv;
+}
+
+void
+ngram_search_free(ps_search_t *search)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ ps_search_deinit(search);
+ if (ngs->fwdtree)
+ ngram_fwdtree_deinit(ngs);
+ if (ngs->fwdflat)
+ ngram_fwdflat_deinit(ngs);
+ if (ngs->bestpath) {
+ double n_speech = (double)ngs->n_tot_frame
+ / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
+
+ E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
+ ngs->bestpath_perf.t_tot_cpu,
+ ngs->bestpath_perf.t_tot_cpu / n_speech);
+ E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
+ ngs->bestpath_perf.t_tot_elapsed,
+ ngs->bestpath_perf.t_tot_elapsed / n_speech);
+ }
+
+ hmm_context_free(ngs->hmmctx);
+ listelem_alloc_free(ngs->chan_alloc);
+ listelem_alloc_free(ngs->root_chan_alloc);
+ listelem_alloc_free(ngs->latnode_alloc);
+ ngram_model_free(ngs->lmset);
+
+ ckd_free(ngs->word_chan);
+ ckd_free(ngs->word_lat_idx);
+ bitvec_free(ngs->word_active);
+ ckd_free(ngs->bp_table);
+ ckd_free(ngs->bscore_stack);
+ if (ngs->bp_table_idx != NULL)
+ ckd_free(ngs->bp_table_idx - 1);
+ ckd_free_2d(ngs->active_word_list);
+ ckd_free(ngs->last_ltrans);
+ ckd_free(ngs);
+}
+
+int
+ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
+{
+ if (frame_idx >= ngs->n_frame_alloc) {
+ ngs->n_frame_alloc *= 2;
+ ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
+ (ngs->n_frame_alloc + 1)
+ * sizeof(*ngs->bp_table_idx));
+ if (ngs->frm_wordlist) {
+ ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
+ ngs->n_frame_alloc
+ * sizeof(*ngs->frm_wordlist));
+ }
+ ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
+ }
+ ngs->bp_table_idx[frame_idx] = ngs->bpidx;
+ return ngs->bpidx;
+}
+
+static void
+set_real_wid(ngram_search_t *ngs, int32 bp)
+{
+ bptbl_t *ent, *prev;
+
+ assert(bp != NO_BP);
+ ent = ngs->bp_table + bp;
+ if (ent->bp == NO_BP)
+ prev = NULL;
+ else
+ prev = ngs->bp_table + ent->bp;
+
+ /* Propagate lm state for fillers, rotate it for words. */
+ if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
+ if (prev != NULL) {
+ ent->real_wid = prev->real_wid;
+ ent->prev_real_wid = prev->prev_real_wid;
+ }
+ else {
+ ent->real_wid = dict_basewid(ps_search_dict(ngs),
+ ent->wid);
+ ent->prev_real_wid = BAD_S3WID;
+ }
+ }
+ else {
+ ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
+ if (prev != NULL)
+ ent->prev_real_wid = prev->real_wid;
+ else
+ ent->prev_real_wid = BAD_S3WID;
+ }
+}
+
+#define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */
+
+void
+ngram_search_save_bp(ngram_search_t *ngs, int frame_idx,
+ int32 w, int32 score, int32 path, int32 rc)
+{
+ int32 bp;
+
+ /* Look for an existing exit for this word in this frame. The
+ * only reason one would exist is from a different right context
+ * triphone, but of course that happens quite frequently. */
+ bp = ngs->word_lat_idx[w];
+ if (bp != NO_BP) {
+
+ if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) {
+ E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
+ frame_idx - ngs->bp_table[path].frame);
+ }
+
+ /* Keep only the best scoring one, we will reconstruct the
+ * others from the right context scores - usually the history
+ * is not lost. */
+ if (ngs->bp_table[bp].score WORSE_THAN score) {
+ assert(path != bp); /* Pathological. */
+ if (ngs->bp_table[bp].bp != path) {
+ int32 bplh[2], newlh[2];
+ /* But, sometimes, the history *is* lost. If we wanted to
+ * do exact language model scoring we'd have to preserve
+ * these alternate histories. */
+ E_DEBUG(2,("Updating path history %d => %d frame %d\n",
+ ngs->bp_table[bp].bp, path, frame_idx));
+ bplh[0] = ngs->bp_table[bp].bp == -1
+ ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
+ bplh[1] = ngs->bp_table[bp].bp == -1
+ ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
+ newlh[0] = path == -1
+ ? -1 : ngs->bp_table[path].prev_real_wid;
+ newlh[1] = path == -1
+ ? -1 : ngs->bp_table[path].real_wid;
+ /* Actually it's worth checking how often the actual
+ * language model state changes. */
+ if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
+ /* It's fairly rare that the actual language model
+ * state changes, but it does happen some
+ * times. */
+ E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
+ dict_wordstr(ps_search_dict(ngs), bplh[0]),
+ dict_wordstr(ps_search_dict(ngs), bplh[1]),
+ dict_wordstr(ps_search_dict(ngs), newlh[0]),
+ dict_wordstr(ps_search_dict(ngs), newlh[1]),
+ frame_idx));
+ set_real_wid(ngs, bp);
+ }
+ ngs->bp_table[bp].bp = path;
+ }
+ ngs->bp_table[bp].score = score;
+ }
+ /* But do keep track of scores for all right contexts, since
+ * we need them to determine the starting path scores for any
+ * successors of this word exit. */
+ if (ngs->bp_table[bp].s_idx != -1)
+ ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
+ }
+ else {
+ int32 i, rcsize;
+ bptbl_t *be;
+
+ /* This might happen if recognition fails. */
+ if (ngs->bpidx == NO_BP) {
+ E_ERROR("No entries in backpointer table!");
+ return;
+ }
+
+ /* Expand the backpointer tables if necessary. */
+ if (ngs->bpidx >= ngs->bp_table_size) {
+ ngs->bp_table_size *= 2;
+ ngs->bp_table = ckd_realloc(ngs->bp_table,
+ ngs->bp_table_size
+ * sizeof(*ngs->bp_table));
+ E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
+ }
+ if (ngs->bss_head >= ngs->bscore_stack_size
+ - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
+ ngs->bscore_stack_size *= 2;
+ ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
+ ngs->bscore_stack_size
+ * sizeof(*ngs->bscore_stack));
+ E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
+ }
+
+ ngs->word_lat_idx[w] = ngs->bpidx;
+ be = &(ngs->bp_table[ngs->bpidx]);
+ be->wid = w;
+ be->frame = frame_idx;
+ be->bp = path;
+ be->score = score;
+ be->s_idx = ngs->bss_head;
+ be->valid = TRUE;
+ assert(path != ngs->bpidx);
+
+ /* DICT2PID */
+ /* Get diphone ID for final phone and number of ssids corresponding to it. */
+ be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
+ if (dict_is_single_phone(ps_search_dict(ngs), w)) {
+ be->last2_phone = -1;
+ be->s_idx = -1;
+ rcsize = 0;
+ }
+ else {
+ be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
+ rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
+ be->last_phone, be->last2_phone)->n_ssid;
+ }
+ /* Allocate some space on the bscore_stack for all of these triphones. */
+ for (i = 0; i < rcsize; ++i)
+ ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
+ if (rcsize)
+ ngs->bscore_stack[ngs->bss_head + rc] = score;
+ set_real_wid(ngs, ngs->bpidx);
+
+ ngs->bpidx++;
+ ngs->bss_head += rcsize;
+ }
+}
+
+int
+ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
+{
+ /* End of backpointers for this frame. */
+ int end_bpidx;
+ int best_exit, bp;
+ int32 best_score;
+
+ /* No hypothesis means no exit node! */
+ if (ngs->n_frame == 0)
+ return NO_BP;
+
+ if (frame_idx == -1 || frame_idx >= ngs->n_frame)
+ frame_idx = ngs->n_frame - 1;
+ end_bpidx = ngs->bp_table_idx[frame_idx];
+
+ best_score = WORST_SCORE;
+ best_exit = NO_BP;
+
+ /* Scan back to find a frame with some backpointers in it. */
+ while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
+ --frame_idx;
+ /* This is NOT an error, it just means there is no hypothesis yet. */
+ if (frame_idx < 0)
+ return NO_BP;
+
+ /* Now find the entry for </s> OR the best scoring entry. */
+ assert(end_bpidx < ngs->bp_table_size);
+ for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
+ if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
+ || ngs->bp_table[bp].score BETTER_THAN best_score) {
+ best_score = ngs->bp_table[bp].score;
+ best_exit = bp;
+ }
+ if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
+ break;
+ }
+
+ if (out_best_score) {
+ *out_best_score = best_score;
+ }
+ if (out_is_final) {
+ *out_is_final = (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs));
+ }
+ return best_exit;
+}
+
+char const *
+ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
+{
+ ps_search_t *base = ps_search_base(ngs);
+ char *c;
+ size_t len;
+ int bp;
+
+ if (bpidx == NO_BP)
+ return NULL;
+
+ bp = bpidx;
+ len = 0;
+ while (bp != NO_BP) {
+ bptbl_t *be = &ngs->bp_table[bp];
+ bp = be->bp;
+ if (dict_real_word(ps_search_dict(ngs), be->wid))
+ len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
+ }
+
+ ckd_free(base->hyp_str);
+ if (len == 0) {
+ base->hyp_str = NULL;
+ return base->hyp_str;
+ }
+ base->hyp_str = ckd_calloc(1, len);
+
+ bp = bpidx;
+ c = base->hyp_str + len - 1;
+ while (bp != NO_BP) {
+ bptbl_t *be = &ngs->bp_table[bp];
+ size_t len;
+
+ bp = be->bp;
+ if (dict_real_word(ps_search_dict(ngs), be->wid)) {
+ len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
+ c -= len;
+ memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
+ if (c > base->hyp_str) {
+ --c;
+ *c = ' ';
+ }
+ }
+ }
+
+ return base->hyp_str;
+}
+
+void
+ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
+{
+ chan_t *hmm, *thmm;
+ xwdssid_t *rssid;
+ int32 i, tmatid, ciphone;
+
+ /* DICT2PID */
+ /* Get pointer to array of triphones for final diphone. */
+ assert(!dict_is_single_phone(ps_search_dict(ngs), w));
+ ciphone = dict_last_phone(ps_search_dict(ngs),w);
+ rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
+ ciphone,
+ dict_second_last_phone(ps_search_dict(ngs),w));
+ tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
+ hmm = ngs->word_chan[w];
+ if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
+ hmm = listelem_malloc(ngs->chan_alloc);
+ hmm->next = ngs->word_chan[w];
+ ngs->word_chan[w] = hmm;
+
+ hmm->info.rc_id = 0;
+ hmm->ciphone = ciphone;
+ hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
+ E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
+ rssid->ssid[0], hmm->ciphone,
+ dict_second_last_phone(ps_search_dict(ngs),w),
+ dict_wordstr(ps_search_dict(ngs),w)));
+ }
+ for (i = 1; i < rssid->n_ssid; ++i) {
+ if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
+ thmm = listelem_malloc(ngs->chan_alloc);
+ thmm->next = hmm->next;
+ hmm->next = thmm;
+ hmm = thmm;
+
+ hmm->info.rc_id = i;
+ hmm->ciphone = ciphone;
+ hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
+ E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
+ i, rssid->ssid[i], hmm->ciphone,
+ dict_second_last_phone(ps_search_dict(ngs),w),
+ dict_wordstr(ps_search_dict(ngs),w)));
+ }
+ else
+ hmm = hmm->next;
+ }
+}
+
+void
+ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
+{
+ chan_t *hmm, *thmm;
+
+ for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
+ thmm = hmm->next;
+ hmm_deinit(&hmm->hmm);
+ listelem_free(ngs->chan_alloc, hmm);
+ }
+ ngs->word_chan[w] = NULL;
+}
+
+int32
+ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
+{
+ /* DICT2PID */
+ /* Get the mapping from right context phone ID to index in the
+ * right context table and the bscore_stack. */
+ if (pbe->last2_phone == -1) {
+ /* No right context for single phone predecessor words. */
+ return pbe->score;
+ }
+ else {
+ xwdssid_t *rssid;
+ /* Find the index for the last diphone of the previous word +
+ * the first phone of the current word. */
+ rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
+ pbe->last_phone, pbe->last2_phone);
+ /* This may be WORST_SCORE, which means that there was no exit
+ * with rcphone as right context. */
+ return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
+ }
+}
+
+/*
+ * Compute acoustic and LM scores for a BPTable entry (segment).
+ */
+void
+ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
+ int32 *out_ascr, int32 *out_lscr)
+{
+ bptbl_t *pbe;
+ int32 start_score;
+
+ /* Start of utterance. */
+ if (be->bp == NO_BP) {
+ *out_ascr = be->score;
+ *out_lscr = 0;
+ return;
+ }
+
+ /* Otherwise, calculate lscr and ascr. */
+ pbe = ngs->bp_table + be->bp;
+ start_score = ngram_search_exit_score(ngs, pbe,
+ dict_first_phone(ps_search_dict(ngs),be->wid));
+ assert(start_score BETTER_THAN WORST_SCORE);
+
+ /* FIXME: These result in positive acoustic scores when filler
+ words have non-filler pronunciations. That whole business
+ is still pretty much broken but at least it doesn't
+ segfault. */
+ if (be->wid == ps_search_silence_wid(ngs)) {
+ *out_lscr = ngs->silpen;
+ }
+ else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
+ *out_lscr = ngs->fillpen;
+ }
+ else {
+ int32 n_used;
+ *out_lscr = ngram_tg_score(ngs->lmset,
+ be->real_wid,
+ pbe->real_wid,
+ pbe->prev_real_wid,
+ &n_used)>>SENSCR_SHIFT;
+ *out_lscr = *out_lscr * lwf;
+ }
+ *out_ascr = be->score - start_score - *out_lscr;
+}
+
+static int
+ngram_search_start(ps_search_t *search)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ ngs->done = FALSE;
+ ngram_model_flush(ngs->lmset);
+ if (ngs->fwdtree)
+ ngram_fwdtree_start(ngs);
+ else if (ngs->fwdflat)
+ ngram_fwdflat_start(ngs);
+ else
+ return -1;
+ return 0;
+}
+
+static int
+ngram_search_step(ps_search_t *search, int frame_idx)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ if (ngs->fwdtree)
+ return ngram_fwdtree_search(ngs, frame_idx);
+ else if (ngs->fwdflat)
+ return ngram_fwdflat_search(ngs, frame_idx);
+ else
+ return -1;
+}
+
+void
+dump_bptable(ngram_search_t *ngs)
+{
+ int i;
+ E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
+ for (i = 0; i < ngs->bpidx; ++i) {
+ bptbl_t *bpe = ngs->bp_table + i;
+ int j, rcsize;
+
+ E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
+ i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
+ (bpe->bp == -1
+ ? 0 : ngs->bp_table[bpe->bp].frame + 1),
+ bpe->frame, bpe->score, bpe->bp,
+ bpe->real_wid, bpe->prev_real_wid);
+
+ if (bpe->last2_phone == -1)
+ rcsize = 0;
+ else
+ rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
+ bpe->last_phone, bpe->last2_phone)->n_ssid;
+ if (rcsize) {
+ E_INFOCONT("\tbss");
+ for (j = 0; j < rcsize; ++j)
+ if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
+ E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
+ }
+ E_INFOCONT("\n");
+ }
+}
+
+static int
+ngram_search_finish(ps_search_t *search)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ ngs->n_tot_frame += ngs->n_frame;
+ if (ngs->fwdtree) {
+ ngram_fwdtree_finish(ngs);
+ /* dump_bptable(ngs); */
+
+ /* Now do fwdflat search in its entirety, if requested. */
+ if (ngs->fwdflat) {
+ int i;
+ /* Rewind the acoustic model. */
+ if (acmod_rewind(ps_search_acmod(ngs)) < 0)
+ return -1;
+ /* Now redo search. */
+ ngram_fwdflat_start(ngs);
+ i = 0;
+ while (ps_search_acmod(ngs)->n_feat_frame > 0) {
+ int nfr;
+ if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
+ return nfr;
+ acmod_advance(ps_search_acmod(ngs));
+ ++i;
+ }
+ ngram_fwdflat_finish(ngs);
+ /* And now, we should have a result... */
+ /* dump_bptable(ngs); */
+ }
+ }
+ else if (ngs->fwdflat) {
+ ngram_fwdflat_finish(ngs);
+ }
+
+ /* Mark the current utterance as done. */
+ ngs->done = TRUE;
+ return 0;
+}
+
+static ps_latlink_t *
+ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ if (search->last_link == NULL) {
+ search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
+ ngs->bestpath_fwdtree_lw_ratio,
+ ngs->ascale);
+ if (search->last_link == NULL)
+ return NULL;
+ /* Also calculate betas so we can fill in the posterior
+ * probability field in the segmentation. */
+ if (search->post == 0)
+ search->post = ps_lattice_posterior(search->dag, ngs->lmset,
+ ngs->ascale);
+ }
+ if (out_score)
+ *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
+ return search->last_link;
+}
+
+static char const *
+ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ /* Only do bestpath search if the utterance is complete. */
+ if (ngs->bestpath && ngs->done) {
+ ps_lattice_t *dag;
+ ps_latlink_t *link;
+ char const *hyp;
+ double n_speech;
+
+ ptmr_reset(&ngs->bestpath_perf);
+ ptmr_start(&ngs->bestpath_perf);
+ if ((dag = ngram_search_lattice(search)) == NULL)
+ return NULL;
+ if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
+ return NULL;
+ hyp = ps_lattice_hyp(dag, link);
+ ptmr_stop(&ngs->bestpath_perf);
+ n_speech = (double)dag->n_frames
+ / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
+ E_INFO("bestpath %.2f CPU %.3f xRT\n",
+ ngs->bestpath_perf.t_cpu,
+ ngs->bestpath_perf.t_cpu / n_speech);
+ E_INFO("bestpath %.2f wall %.3f xRT\n",
+ ngs->bestpath_perf.t_elapsed,
+ ngs->bestpath_perf.t_elapsed / n_speech);
+ return hyp;
+ }
+ else {
+ int32 bpidx;
+
+ /* fwdtree and fwdflat use same backpointer table. */
+ bpidx = ngram_search_find_exit(ngs, -1, out_score, out_is_final);
+ if (bpidx != NO_BP)
+ return ngram_search_bp_hyp(ngs, bpidx);
+ }
+
+ return NULL;
+}
+
+static void
+ngram_search_bp2itor(ps_seg_t *seg, int bp)
+{
+ ngram_search_t *ngs = (ngram_search_t *)seg->search;
+ bptbl_t *be, *pbe;
+
+ be = &ngs->bp_table[bp];
+ pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
+ seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
+ seg->ef = be->frame;
+ seg->sf = pbe ? pbe->frame + 1 : 0;
+ seg->prob = 0; /* Bogus value... */
+ /* Compute acoustic and LM scores for this segment. */
+ if (pbe == NULL) {
+ seg->ascr = be->score;
+ seg->lscr = 0;
+ seg->lback = 0;
+ }
+ else {
+ int32 start_score;
+
+ /* Find ending path score of previous word. */
+ start_score = ngram_search_exit_score(ngs, pbe,
+ dict_first_phone(ps_search_dict(ngs), be->wid));
+ assert(start_score BETTER_THAN WORST_SCORE);
+ if (be->wid == ps_search_silence_wid(ngs)) {
+ seg->lscr = ngs->silpen;
+ }
+ else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
+ seg->lscr = ngs->fillpen;
+ }
+ else {
+ seg->lscr = ngram_tg_score(ngs->lmset,
+ be->real_wid,
+ pbe->real_wid,
+ pbe->prev_real_wid,
+ &seg->lback)>>SENSCR_SHIFT;
+ seg->lscr = (int32)(seg->lscr * seg->lwf);
+ }
+ seg->ascr = be->score - start_score - seg->lscr;
+ }
+}
+
+static void
+ngram_bp_seg_free(ps_seg_t *seg)
+{
+ bptbl_seg_t *itor = (bptbl_seg_t *)seg;
+
+ ckd_free(itor->bpidx);
+ ckd_free(itor);
+}
+
+static ps_seg_t *
+ngram_bp_seg_next(ps_seg_t *seg)
+{
+ bptbl_seg_t *itor = (bptbl_seg_t *)seg;
+
+ if (++itor->cur == itor->n_bpidx) {
+ ngram_bp_seg_free(seg);
+ return NULL;
+ }
+
+ ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
+ return seg;
+}
+
+static ps_segfuncs_t ngram_bp_segfuncs = {
+ /* seg_next */ ngram_bp_seg_next,
+ /* seg_free */ ngram_bp_seg_free
+};
+
+static ps_seg_t *
+ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
+{
+ bptbl_seg_t *itor;
+ int bp, cur;
+
+ /* Calling this an "iterator" is a bit of a misnomer since we have
+ * to get the entire backtrace in order to produce it. On the
+ * other hand, all we actually need is the bptbl IDs, and we can
+ * allocate a fixed-size array of them. */
+ itor = ckd_calloc(1, sizeof(*itor));
+ itor->base.vt = &ngram_bp_segfuncs;
+ itor->base.search = ps_search_base(ngs);
+ itor->base.lwf = lwf;
+ itor->n_bpidx = 0;
+ bp = bpidx;
+ while (bp != NO_BP) {
+ bptbl_t *be = &ngs->bp_table[bp];
+ bp = be->bp;
+ ++itor->n_bpidx;
+ }
+ if (itor->n_bpidx == 0) {
+ ckd_free(itor);
+ return NULL;
+ }
+ itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
+ cur = itor->n_bpidx - 1;
+ bp = bpidx;
+ while (bp != NO_BP) {
+ bptbl_t *be = &ngs->bp_table[bp];
+ itor->bpidx[cur] = bp;
+ bp = be->bp;
+ --cur;
+ }
+
+ /* Fill in relevant fields for first element. */
+ ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
+
+ return (ps_seg_t *)itor;
+}
+
+static ps_seg_t *
+ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ /* Only do bestpath search if the utterance is done. */
+ if (ngs->bestpath && ngs->done) {
+ ps_lattice_t *dag;
+ ps_latlink_t *link;
+ double n_speech;
+ ps_seg_t *itor;
+
+ ptmr_reset(&ngs->bestpath_perf);
+ ptmr_start(&ngs->bestpath_perf);
+ if ((dag = ngram_search_lattice(search)) == NULL)
+ return NULL;
+ if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
+ return NULL;
+ itor = ps_lattice_seg_iter(dag, link,
+ ngs->bestpath_fwdtree_lw_ratio);
+ ptmr_stop(&ngs->bestpath_perf);
+ n_speech = (double)dag->n_frames
+ / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
+ E_INFO("bestpath %.2f CPU %.3f xRT\n",
+ ngs->bestpath_perf.t_cpu,
+ ngs->bestpath_perf.t_cpu / n_speech);
+ E_INFO("bestpath %.2f wall %.3f xRT\n",
+ ngs->bestpath_perf.t_elapsed,
+ ngs->bestpath_perf.t_elapsed / n_speech);
+ return itor;
+ }
+ else {
+ int32 bpidx;
+
+ /* fwdtree and fwdflat use same backpointer table. */
+ bpidx = ngram_search_find_exit(ngs, -1, out_score, NULL);
+ return ngram_search_bp_iter(ngs, bpidx,
+ /* but different language weights... */
+ (ngs->done && ngs->fwdflat)
+ ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
+ }
+
+ return NULL;
+}
+
+static int32
+ngram_search_prob(ps_search_t *search)
+{
+ ngram_search_t *ngs = (ngram_search_t *)search;
+
+ /* Only do bestpath search if the utterance is done. */
+ if (ngs->bestpath && ngs->done) {
+ ps_lattice_t *dag;
+ ps_latlink_t *link;
+
+ if ((dag = ngram_search_lattice(search)) == NULL)
+ return 0;
+ if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
+ return 0;
+ return search->post;
+ }
+ else {
+ /* FIXME: Give some kind of good estimate here, eventually. */
+ return 0;
+ }
+}
+
+static void
+create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
+{
+ bptbl_t *bp_ptr;
+ int32 i;
+
+ for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
+ int32 sf, ef, wid;
+ ps_latnode_t *node;
+
+ /* Skip invalid backpointers (these result from -maxwpf pruning) */
+ if (!bp_ptr->valid)
+ continue;
+
+ sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
+ ef = bp_ptr->frame;
+ wid = bp_ptr->wid;
+
+ assert(ef < dag->n_frames);
+ /* Skip non-final </s> entries. */
+ if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
+ continue;
+
+ /* Skip if word not in LM */
+ if ((!dict_filler_word(ps_search_dict(ngs), wid))
+ && (!ngram_model_set_known_wid(ngs->lmset,
+ dict_basewid(ps_search_dict(ngs), wid))))
+ continue;
+
+ /* See if bptbl entry <wid,sf> already in lattice */
+ for (node = dag->nodes; node; node = node->next) {
+ if ((node->wid == wid) && (node->sf == sf))
+ break;
+ }
+
+ /* For the moment, store bptbl indices in node.{fef,lef} */
+ if (node)
+ node->lef = i;
+ else {
+ /* New node; link to head of list */
+ node = listelem_malloc(dag->latnode_alloc);
+ node->wid = wid;
+ node->sf = sf; /* This is a frame index. */
+ node->fef = node->lef = i; /* These are backpointer indices (argh) */
+ node->reachable = FALSE;
+ node->entries = NULL;
+ node->exits = NULL;
+
+ /* NOTE: This creates the list of nodes in reverse
+ * topological order, i.e. a node always precedes its
+ * antecedents in this list. */
+ node->next = dag->nodes;
+ dag->nodes = node;
+ ++dag->n_nodes;
+ }
+ }
+}
+
+static ps_latnode_t *
+find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
+{
+ ps_latnode_t *node;
+
+ /* Find start node <s>.0 */
+ for (node = dag->nodes; node; node = node->next) {
+ if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
+ break;
+ }
+ if (!node) {
+ /* This is probably impossible. */
+ E_ERROR("Couldn't find <s> in first frame\n");
+ return NULL;
+ }
+ return node;
+}
+
+static ps_latnode_t *
+find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
+{
+ ps_latnode_t *node;
+ int32 ef, bestbp, bp, bestscore;
+
+ /* Find final node </s>.last_frame; nothing can follow this node */
+ for (node = dag->nodes; node; node = node->next) {
+ int32 lef = ngs->bp_table[node->lef].frame;
+ if ((node->wid == ps_search_finish_wid(ngs))
+ && (lef == dag->n_frames - 1))
+ break;
+ }
+ if (node != NULL)
+ return node;
+
+ /* It is quite likely that no </s> exited in the last frame. So,
+ * find the node corresponding to the best exit. */
+ /* Find the last frame containing a word exit. */
+ for (ef = dag->n_frames - 1;
+ ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
+ --ef);
+ if (ef < 0) {
+ E_ERROR("Empty backpointer table: can not build DAG.\n");
+ return NULL;
+ }
+
+ /* Find best word exit in that frame. */
+ bestscore = WORST_SCORE;
+ bestbp = NO_BP;
+ for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
+ int32 n_used, l_scr, wid, prev_wid;
+ wid = ngs->bp_table[bp].real_wid;
+ prev_wid = ngs->bp_table[bp].prev_real_wid;
+ /* Always prefer </s>, of which there will only be one per frame. */
+ if (wid == ps_search_finish_wid(ngs)) {
+ bestbp = bp;
+ break;
+ }
+ l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
+ wid, prev_wid, &n_used) >>SENSCR_SHIFT;
+ l_scr = l_scr * lwf;
+ if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
+ bestscore = ngs->bp_table[bp].score + l_scr;
+ bestbp = bp;
+ }
+ }
+ if (bestbp == NO_BP) {
+ E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
+ return NULL;
+ }
+ E_INFO("</s> not found in last frame, using %s.%d instead\n",
+ dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
+
+ /* Now find the node that corresponds to it. */
+ for (node = dag->nodes; node; node = node->next) {
+ if (node->lef == bestbp)
+ return node;
+ }
+
+ /* FIXME: This seems to happen a lot! */
+ E_ERROR("Failed to find DAG node corresponding to %s\n",
+ dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
+ return NULL;
+}
+
+/*
+ * Build lattice from bptable.
+ */
+ps_lattice_t *
+ngram_search_lattice(ps_search_t *search)
+{
+ int32 i, score, ascr, lscr;
+ ps_latnode_t *node, *from, *to;
+ ngram_search_t *ngs;
+ ps_lattice_t *dag;
+ int min_endfr, nlink;
+ float lwf;
+
+ ngs = (ngram_search_t *)search;
+ min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
+
+ /* If the best score is WORST_SCORE or worse, there is no way to
+ * make a lattice. */
+ if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
+ return NULL;
+
+ /* Check to see if a lattice has previously been created over the
+ * same number of frames, and reuse it if so. */
+ if (search->dag && search->dag->n_frames == ngs->n_frame)
+ return search->dag;
+
+ /* Nope, create a new one. */
+ ps_lattice_free(search->dag);
+ search->dag = NULL;
+ dag = ps_lattice_init_search(search, ngs->n_frame);
+ /* Compute these such that they agree with the fwdtree language weight. */
+ lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
+ create_dag_nodes(ngs, dag);
+ if ((dag->start = find_start_node(ngs, dag)) == NULL)
+ goto error_out;
+ if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
+ goto error_out;
+ E_INFO("lattice start node %s.%d end node %s.%d\n",
+ dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
+ dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
+
+ ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
+ &dag->final_node_ascr, &lscr);
+
+ /*
+ * At this point, dag->nodes is ordered such that nodes earlier in
+ * the list can follow (in time) those later in the list, but not
+ * vice versa (see above - also note that adjacency is purely
+ * determined by time which is why we can make this claim). Now
+ * create precedence links and simultanesously mark all nodes that
+ * can reach dag->end. (All nodes are reached from dag->start
+ * simply by definition - they were created that way).
+ *
+ * Note that this also means that any nodes before dag->end in the
+ * list can be discarded, meaning that dag->end will always be
+ * equal to dag->nodes (FIXME: except when loading from a file but
+ * we can fix that...)
+ */
+ i = 0;
+ while (dag->nodes && dag->nodes != dag->end) {
+ ps_latnode_t *next = dag->nodes->next;
+ listelem_free(dag->latnode_alloc, dag->nodes);
+ dag->nodes = next;
+ ++i;
+ }
+ E_INFO("Eliminated %d nodes before end node\n", i);
+ dag->end->reachable = TRUE;
+ nlink = 0;
+ for (to = dag->end; to; to = to->next) {
+ int fef, lef;
+
+ /* Skip if not reachable; it will never be reachable from dag->end */
+ if (!to->reachable)
+ continue;
+
+ /* Prune nodes with too few endpoints - heuristic
+ borrowed from Sphinx3 */
+ fef = ngs->bp_table[to->fef].frame;
+ lef = ngs->bp_table[to->lef].frame;
+ if (to != dag->end && lef - fef < min_endfr) {
+ to->reachable = FALSE;
+ continue;
+ }
+
+ /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
+ for (from = to->next; from; from = from->next) {
+ bptbl_t *from_bpe;
+
+ fef = ngs->bp_table[from->fef].frame;
+ lef = ngs->bp_table[from->lef].frame;
+
+ if ((to->sf <= fef) || (to->sf > lef + 1))
+ continue;
+ if (lef - fef < min_endfr) {
+ assert(!from->reachable);
+ continue;
+ }
+
+ /* Find bptable entry for "from" that exactly precedes "to" */
+ i = from->fef;
+ from_bpe = ngs->bp_table + i;
+ for (; i <= from->lef; i++, from_bpe++) {
+ if (from_bpe->wid != from->wid)
+ continue;
+ if (from_bpe->frame >= to->sf - 1)
+ break;
+ }
+
+ if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
+ continue;
+
+ /* Find acoustic score from.sf->to.sf-1 with right context = to */
+ /* This gives us from_bpe's best acoustic score. */
+ ngram_compute_seg_score(ngs, from_bpe, lwf,
+ &ascr, &lscr);
+ /* Now find the exact path score for from->to, including
+ * the appropriate final triphone. In fact this might not
+ * exist. */
+ score = ngram_search_exit_score(ngs, from_bpe,
+ dict_first_phone(ps_search_dict(ngs), to->wid));
+ /* Does not exist. Can't create a link here. */
+ if (score == WORST_SCORE)
+ continue;
+ /* Adjust the arc score to match the correct triphone. */
+ else
+ score = ascr + (score - from_bpe->score);
+ if (score BETTER_THAN 0) {
+ /* Scores must be negative, or Bad Things will happen.
+ In general, they are, except in corner cases
+ involving filler words. We don't want to throw any
+ links away so we'll keep these, but with some
+ arbitrarily improbable but recognizable score. */
+ ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
+ ++nlink;
+ from->reachable = TRUE;
+ }
+ else if (score BETTER_THAN WORST_SCORE) {
+ ps_lattice_link(dag, from, to, score, from_bpe->frame);
+ ++nlink;
+ from->reachable = TRUE;
+ }
+ }
+ }
+
+ /* There must be at least one path between dag->start and dag->end */
+ if (!dag->start->reachable) {
+ E_ERROR("End node of lattice isolated; unreachable\n");
+ goto error_out;
+ }
+
+ for (node = dag->nodes; node; node = node->next) {
+ /* Change node->{fef,lef} from bptbl indices to frames. */
+ node->fef = ngs->bp_table[node->fef].frame;
+ node->lef = ngs->bp_table[node->lef].frame;
+ /* Find base wid for nodes. */
+ node->basewid = dict_basewid(search->dict, node->wid);
+ }
+
+ /* Link nodes with alternate pronunciations at the same timepoint. */
+ for (node = dag->nodes; node; node = node->next) {
+ ps_latnode_t *alt;
+ /* Scan forward to find the next alternate, then stop. */
+ for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
+ if (alt->basewid == node->basewid) {
+ alt->alt = node->alt;
+ node->alt = alt;
+ break;
+ }
+ }
+ }
+ E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
+
+ /* Minor hack: If the final node is a filler word and not </s>,
+ * then set its base word ID to </s>, so that the language model
+ * scores won't be screwed up. */
+ if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
+ dag->end->basewid = ps_search_finish_wid(ngs);
+
+ /* Free nodes unreachable from dag->end and their links */
+ ps_lattice_delete_unreachable(dag);
+
+ /* Add silprob and fillprob to corresponding links */
+ ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen);
+
+ search->dag = dag;
+ return dag;
+
+error_out:
+ ps_lattice_free(dag);
+ return NULL;
+}
+
+void ngram_search_set_lm(ngram_model_t *lm)
+{
+ default_lm = ngram_model_retain(lm);
+}
+