diff options
Diffstat (limited to 'media/pocketsphinx/src/ngram_search.c')
-rw-r--r-- | media/pocketsphinx/src/ngram_search.c | 1409 |
1 files changed, 1409 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/ngram_search.c b/media/pocketsphinx/src/ngram_search.c new file mode 100644 index 000000000..47e488c3a --- /dev/null +++ b/media/pocketsphinx/src/ngram_search.c @@ -0,0 +1,1409 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search.c N-Gram based multi-pass search ("FBS") + */ + +/* System headers. */ +#include <string.h> +#include <assert.h> + +/* SphinxBase headers. */ +#include <sphinxbase/ckd_alloc.h> +#include <sphinxbase/listelem_alloc.h> +#include <sphinxbase/err.h> + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "ps_lattice_internal.h" +#include "ngram_search.h" +#include "ngram_search_fwdtree.h" +#include "ngram_search_fwdflat.h" + +static int ngram_search_start(ps_search_t *search); +static int ngram_search_step(ps_search_t *search, int frame_idx); +static int ngram_search_finish(ps_search_t *search); +static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); +static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final); +static int32 ngram_search_prob(ps_search_t *search); +static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score); + +static ps_searchfuncs_t ngram_funcs = { + /* name: */ "ngram", + /* start: */ ngram_search_start, + /* step: */ ngram_search_step, + /* finish: */ ngram_search_finish, + /* reinit: */ ngram_search_reinit, + /* free: */ ngram_search_free, + /* lattice: */ ngram_search_lattice, + /* hyp: */ ngram_search_hyp, + /* prob: */ ngram_search_prob, + /* seg_iter: */ ngram_search_seg_iter, +}; + +static ngram_model_t *default_lm; + +static void +ngram_search_update_widmap(ngram_search_t *ngs) +{ + char const **words; + int32 i, n_words; + + /* It's okay to include fillers since they won't be in the LM */ + n_words = ps_search_n_words(ngs); + words = (char const**)ckd_calloc(n_words, sizeof(*words)); + /* This will include alternates, again, that's okay since they aren't in the LM */ + for (i = 0; i < n_words; ++i) + words[i] = dict_wordstr(ps_search_dict(ngs), i); + ngram_model_set_map_words(ngs->lmset, words, n_words); + ckd_free(words); +} + +static void +ngram_search_calc_beams(ngram_search_t *ngs) +{ + cmd_ln_t *config; + acmod_t *acmod; + + config = ps_search_config(ngs); + acmod = ps_search_acmod(ngs); + + /* Log beam widths. */ + ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT; + ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT; + ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT; + ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT; + ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT; + ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT; + ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT; + + /* Absolute pruning parameters. */ + ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf"); + ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf"); + + /* Various penalties which may or may not be useful. */ + ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT; + ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT; + ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT; + ngs->silpen = ngs->pip + + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT); + ngs->fillpen = ngs->pip + + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT); + + /* Language weight ratios for fwdflat and bestpath search. */ + ngs->fwdflat_fwdtree_lw_ratio = + cmd_ln_float32_r(config, "-fwdflatlw") + / cmd_ln_float32_r(config, "-lw"); + ngs->bestpath_fwdtree_lw_ratio = + cmd_ln_float32_r(config, "-bestpathlw") + / cmd_ln_float32_r(config, "-lw"); + + /* Acoustic score scale for posterior probabilities. */ + ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale"); +} + +ps_search_t * +ngram_search_init(ngram_model_t *lm, + cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict, + dict2pid_t *d2p) +{ + ngram_search_t *ngs; + static char *lmname = "default"; + + /* Make the acmod's feature buffer growable if we are doing two-pass + * search. */ + acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") && + cmd_ln_boolean_r(config, "-fwdtree")); + + ngs = ckd_calloc(1, sizeof(*ngs)); + ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p); + ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), + acmod->tmat->tp, NULL, acmod->mdef->sseq); + if (ngs->hmmctx == NULL) { + ps_search_free(ps_search_base(ngs)); + return NULL; + } + ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t)); + ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t)); + ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t)); + + /* Calculate various beam widths and such. */ + ngram_search_calc_beams(ngs); + + /* Allocate a billion different tables for stuff. */ + ngs->word_chan = ckd_calloc(dict_size(dict), + sizeof(*ngs->word_chan)); + ngs->word_lat_idx = ckd_calloc(dict_size(dict), + sizeof(*ngs->word_lat_idx)); + ngs->word_active = bitvec_alloc(dict_size(dict)); + ngs->last_ltrans = ckd_calloc(dict_size(dict), + sizeof(*ngs->last_ltrans)); + + /* FIXME: All these structures need to be made dynamic with + * garbage collection. */ + ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize"); + ngs->bp_table = ckd_calloc(ngs->bp_table_size, + sizeof(*ngs->bp_table)); + /* FIXME: This thing is frickin' huge. */ + ngs->bscore_stack_size = ngs->bp_table_size * 20; + ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size, + sizeof(*ngs->bscore_stack)); + ngs->n_frame_alloc = 256; + ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1, + sizeof(*ngs->bp_table_idx)); + ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */ + + /* Allocate active word list array */ + ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict), + sizeof(**ngs->active_word_list)); + + ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1); + if (!ngs->lmset) + goto error_out; + + if (ngram_wid(ngs->lmset, S3_FINISH_WORD) == + ngram_unknown_wid(ngs->lmset)) + { + E_ERROR("Language model/set does not contain </s>, " + "recognition will fail\n"); + goto error_out; + } + + /* Create word mappings. */ + ngram_search_update_widmap(ngs); + + /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */ + if (cmd_ln_boolean_r(config, "-fwdtree")) { + ngram_fwdtree_init(ngs); + ngs->fwdtree = TRUE; + ngs->fwdtree_perf.name = "fwdtree"; + ptmr_init(&ngs->fwdtree_perf); + } + if (cmd_ln_boolean_r(config, "-fwdflat")) { + ngram_fwdflat_init(ngs); + ngs->fwdflat = TRUE; + ngs->fwdflat_perf.name = "fwdflat"; + ptmr_init(&ngs->fwdflat_perf); + } + if (cmd_ln_boolean_r(config, "-bestpath")) { + ngs->bestpath = TRUE; + ngs->bestpath_perf.name = "bestpath"; + ptmr_init(&ngs->bestpath_perf); + } + + return (ps_search_t *)ngs; + +error_out: + ngram_search_free((ps_search_t *)ngs); + return NULL; +} + +static int +ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + int old_n_words; + int rv = 0; + + /* Update the number of words. */ + old_n_words = search->n_words; + if (old_n_words != dict_size(dict)) { + search->n_words = dict_size(dict); + /* Reallocate these temporary arrays. */ + ckd_free(ngs->word_lat_idx); + ckd_free(ngs->word_active); + ckd_free(ngs->last_ltrans); + ckd_free_2d(ngs->active_word_list); + ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx)); + ngs->word_active = bitvec_alloc(search->n_words); + ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans)); + ngs->active_word_list + = ckd_calloc_2d(2, search->n_words, + sizeof(**ngs->active_word_list)); + } + + /* Free old dict2pid, dict */ + ps_search_base_reinit(search, dict, d2p); + + if (ngs->lmset == NULL) + return 0; + + /* Update beam widths. */ + ngram_search_calc_beams(ngs); + + /* Update word mappings. */ + ngram_search_update_widmap(ngs); + + /* Now rebuild lextrees. */ + if (ngs->fwdtree) { + if ((rv = ngram_fwdtree_reinit(ngs)) < 0) + return rv; + } + if (ngs->fwdflat) { + if ((rv = ngram_fwdflat_reinit(ngs)) < 0) + return rv; + } + + return rv; +} + +void +ngram_search_free(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + ps_search_deinit(search); + if (ngs->fwdtree) + ngram_fwdtree_deinit(ngs); + if (ngs->fwdflat) + ngram_fwdflat_deinit(ngs); + if (ngs->bestpath) { + double n_speech = (double)ngs->n_tot_frame + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + + E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n", + ngs->bestpath_perf.t_tot_cpu, + ngs->bestpath_perf.t_tot_cpu / n_speech); + E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n", + ngs->bestpath_perf.t_tot_elapsed, + ngs->bestpath_perf.t_tot_elapsed / n_speech); + } + + hmm_context_free(ngs->hmmctx); + listelem_alloc_free(ngs->chan_alloc); + listelem_alloc_free(ngs->root_chan_alloc); + listelem_alloc_free(ngs->latnode_alloc); + ngram_model_free(ngs->lmset); + + ckd_free(ngs->word_chan); + ckd_free(ngs->word_lat_idx); + bitvec_free(ngs->word_active); + ckd_free(ngs->bp_table); + ckd_free(ngs->bscore_stack); + if (ngs->bp_table_idx != NULL) + ckd_free(ngs->bp_table_idx - 1); + ckd_free_2d(ngs->active_word_list); + ckd_free(ngs->last_ltrans); + ckd_free(ngs); +} + +int +ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx) +{ + if (frame_idx >= ngs->n_frame_alloc) { + ngs->n_frame_alloc *= 2; + ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1, + (ngs->n_frame_alloc + 1) + * sizeof(*ngs->bp_table_idx)); + if (ngs->frm_wordlist) { + ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist, + ngs->n_frame_alloc + * sizeof(*ngs->frm_wordlist)); + } + ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */ + } + ngs->bp_table_idx[frame_idx] = ngs->bpidx; + return ngs->bpidx; +} + +static void +set_real_wid(ngram_search_t *ngs, int32 bp) +{ + bptbl_t *ent, *prev; + + assert(bp != NO_BP); + ent = ngs->bp_table + bp; + if (ent->bp == NO_BP) + prev = NULL; + else + prev = ngs->bp_table + ent->bp; + + /* Propagate lm state for fillers, rotate it for words. */ + if (dict_filler_word(ps_search_dict(ngs), ent->wid)) { + if (prev != NULL) { + ent->real_wid = prev->real_wid; + ent->prev_real_wid = prev->prev_real_wid; + } + else { + ent->real_wid = dict_basewid(ps_search_dict(ngs), + ent->wid); + ent->prev_real_wid = BAD_S3WID; + } + } + else { + ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid); + if (prev != NULL) + ent->prev_real_wid = prev->real_wid; + else + ent->prev_real_wid = BAD_S3WID; + } +} + +#define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */ + +void +ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, + int32 w, int32 score, int32 path, int32 rc) +{ + int32 bp; + + /* Look for an existing exit for this word in this frame. The + * only reason one would exist is from a different right context + * triphone, but of course that happens quite frequently. */ + bp = ngs->word_lat_idx[w]; + if (bp != NO_BP) { + + if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) { + E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w), + frame_idx - ngs->bp_table[path].frame); + } + + /* Keep only the best scoring one, we will reconstruct the + * others from the right context scores - usually the history + * is not lost. */ + if (ngs->bp_table[bp].score WORSE_THAN score) { + assert(path != bp); /* Pathological. */ + if (ngs->bp_table[bp].bp != path) { + int32 bplh[2], newlh[2]; + /* But, sometimes, the history *is* lost. If we wanted to + * do exact language model scoring we'd have to preserve + * these alternate histories. */ + E_DEBUG(2,("Updating path history %d => %d frame %d\n", + ngs->bp_table[bp].bp, path, frame_idx)); + bplh[0] = ngs->bp_table[bp].bp == -1 + ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid; + bplh[1] = ngs->bp_table[bp].bp == -1 + ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid; + newlh[0] = path == -1 + ? -1 : ngs->bp_table[path].prev_real_wid; + newlh[1] = path == -1 + ? -1 : ngs->bp_table[path].real_wid; + /* Actually it's worth checking how often the actual + * language model state changes. */ + if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) { + /* It's fairly rare that the actual language model + * state changes, but it does happen some + * times. */ + E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n", + dict_wordstr(ps_search_dict(ngs), bplh[0]), + dict_wordstr(ps_search_dict(ngs), bplh[1]), + dict_wordstr(ps_search_dict(ngs), newlh[0]), + dict_wordstr(ps_search_dict(ngs), newlh[1]), + frame_idx)); + set_real_wid(ngs, bp); + } + ngs->bp_table[bp].bp = path; + } + ngs->bp_table[bp].score = score; + } + /* But do keep track of scores for all right contexts, since + * we need them to determine the starting path scores for any + * successors of this word exit. */ + if (ngs->bp_table[bp].s_idx != -1) + ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score; + } + else { + int32 i, rcsize; + bptbl_t *be; + + /* This might happen if recognition fails. */ + if (ngs->bpidx == NO_BP) { + E_ERROR("No entries in backpointer table!"); + return; + } + + /* Expand the backpointer tables if necessary. */ + if (ngs->bpidx >= ngs->bp_table_size) { + ngs->bp_table_size *= 2; + ngs->bp_table = ckd_realloc(ngs->bp_table, + ngs->bp_table_size + * sizeof(*ngs->bp_table)); + E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size); + } + if (ngs->bss_head >= ngs->bscore_stack_size + - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) { + ngs->bscore_stack_size *= 2; + ngs->bscore_stack = ckd_realloc(ngs->bscore_stack, + ngs->bscore_stack_size + * sizeof(*ngs->bscore_stack)); + E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size); + } + + ngs->word_lat_idx[w] = ngs->bpidx; + be = &(ngs->bp_table[ngs->bpidx]); + be->wid = w; + be->frame = frame_idx; + be->bp = path; + be->score = score; + be->s_idx = ngs->bss_head; + be->valid = TRUE; + assert(path != ngs->bpidx); + + /* DICT2PID */ + /* Get diphone ID for final phone and number of ssids corresponding to it. */ + be->last_phone = dict_last_phone(ps_search_dict(ngs),w); + if (dict_is_single_phone(ps_search_dict(ngs), w)) { + be->last2_phone = -1; + be->s_idx = -1; + rcsize = 0; + } + else { + be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w); + rcsize = dict2pid_rssid(ps_search_dict2pid(ngs), + be->last_phone, be->last2_phone)->n_ssid; + } + /* Allocate some space on the bscore_stack for all of these triphones. */ + for (i = 0; i < rcsize; ++i) + ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE; + if (rcsize) + ngs->bscore_stack[ngs->bss_head + rc] = score; + set_real_wid(ngs, ngs->bpidx); + + ngs->bpidx++; + ngs->bss_head += rcsize; + } +} + +int +ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final) +{ + /* End of backpointers for this frame. */ + int end_bpidx; + int best_exit, bp; + int32 best_score; + + /* No hypothesis means no exit node! */ + if (ngs->n_frame == 0) + return NO_BP; + + if (frame_idx == -1 || frame_idx >= ngs->n_frame) + frame_idx = ngs->n_frame - 1; + end_bpidx = ngs->bp_table_idx[frame_idx]; + + best_score = WORST_SCORE; + best_exit = NO_BP; + + /* Scan back to find a frame with some backpointers in it. */ + while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx) + --frame_idx; + /* This is NOT an error, it just means there is no hypothesis yet. */ + if (frame_idx < 0) + return NO_BP; + + /* Now find the entry for </s> OR the best scoring entry. */ + assert(end_bpidx < ngs->bp_table_size); + for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) { + if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs) + || ngs->bp_table[bp].score BETTER_THAN best_score) { + best_score = ngs->bp_table[bp].score; + best_exit = bp; + } + if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)) + break; + } + + if (out_best_score) { + *out_best_score = best_score; + } + if (out_is_final) { + *out_is_final = (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)); + } + return best_exit; +} + +char const * +ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx) +{ + ps_search_t *base = ps_search_base(ngs); + char *c; + size_t len; + int bp; + + if (bpidx == NO_BP) + return NULL; + + bp = bpidx; + len = 0; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + bp = be->bp; + if (dict_real_word(ps_search_dict(ngs), be->wid)) + len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1; + } + + ckd_free(base->hyp_str); + if (len == 0) { + base->hyp_str = NULL; + return base->hyp_str; + } + base->hyp_str = ckd_calloc(1, len); + + bp = bpidx; + c = base->hyp_str + len - 1; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + size_t len; + + bp = be->bp; + if (dict_real_word(ps_search_dict(ngs), be->wid)) { + len = strlen(dict_basestr(ps_search_dict(ngs), be->wid)); + c -= len; + memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len); + if (c > base->hyp_str) { + --c; + *c = ' '; + } + } + } + + return base->hyp_str; +} + +void +ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w) +{ + chan_t *hmm, *thmm; + xwdssid_t *rssid; + int32 i, tmatid, ciphone; + + /* DICT2PID */ + /* Get pointer to array of triphones for final diphone. */ + assert(!dict_is_single_phone(ps_search_dict(ngs), w)); + ciphone = dict_last_phone(ps_search_dict(ngs),w); + rssid = dict2pid_rssid(ps_search_dict2pid(ngs), + ciphone, + dict_second_last_phone(ps_search_dict(ngs),w)); + tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone); + hmm = ngs->word_chan[w]; + if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) { + hmm = listelem_malloc(ngs->chan_alloc); + hmm->next = ngs->word_chan[w]; + ngs->word_chan[w] = hmm; + + hmm->info.rc_id = 0; + hmm->ciphone = ciphone; + hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid); + E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n", + rssid->ssid[0], hmm->ciphone, + dict_second_last_phone(ps_search_dict(ngs),w), + dict_wordstr(ps_search_dict(ngs),w))); + } + for (i = 1; i < rssid->n_ssid; ++i) { + if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) { + thmm = listelem_malloc(ngs->chan_alloc); + thmm->next = hmm->next; + hmm->next = thmm; + hmm = thmm; + + hmm->info.rc_id = i; + hmm->ciphone = ciphone; + hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid); + E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n", + i, rssid->ssid[i], hmm->ciphone, + dict_second_last_phone(ps_search_dict(ngs),w), + dict_wordstr(ps_search_dict(ngs),w))); + } + else + hmm = hmm->next; + } +} + +void +ngram_search_free_all_rc(ngram_search_t *ngs, int32 w) +{ + chan_t *hmm, *thmm; + + for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) { + thmm = hmm->next; + hmm_deinit(&hmm->hmm); + listelem_free(ngs->chan_alloc, hmm); + } + ngs->word_chan[w] = NULL; +} + +int32 +ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone) +{ + /* DICT2PID */ + /* Get the mapping from right context phone ID to index in the + * right context table and the bscore_stack. */ + if (pbe->last2_phone == -1) { + /* No right context for single phone predecessor words. */ + return pbe->score; + } + else { + xwdssid_t *rssid; + /* Find the index for the last diphone of the previous word + + * the first phone of the current word. */ + rssid = dict2pid_rssid(ps_search_dict2pid(ngs), + pbe->last_phone, pbe->last2_phone); + /* This may be WORST_SCORE, which means that there was no exit + * with rcphone as right context. */ + return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]]; + } +} + +/* + * Compute acoustic and LM scores for a BPTable entry (segment). + */ +void +ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf, + int32 *out_ascr, int32 *out_lscr) +{ + bptbl_t *pbe; + int32 start_score; + + /* Start of utterance. */ + if (be->bp == NO_BP) { + *out_ascr = be->score; + *out_lscr = 0; + return; + } + + /* Otherwise, calculate lscr and ascr. */ + pbe = ngs->bp_table + be->bp; + start_score = ngram_search_exit_score(ngs, pbe, + dict_first_phone(ps_search_dict(ngs),be->wid)); + assert(start_score BETTER_THAN WORST_SCORE); + + /* FIXME: These result in positive acoustic scores when filler + words have non-filler pronunciations. That whole business + is still pretty much broken but at least it doesn't + segfault. */ + if (be->wid == ps_search_silence_wid(ngs)) { + *out_lscr = ngs->silpen; + } + else if (dict_filler_word(ps_search_dict(ngs), be->wid)) { + *out_lscr = ngs->fillpen; + } + else { + int32 n_used; + *out_lscr = ngram_tg_score(ngs->lmset, + be->real_wid, + pbe->real_wid, + pbe->prev_real_wid, + &n_used)>>SENSCR_SHIFT; + *out_lscr = *out_lscr * lwf; + } + *out_ascr = be->score - start_score - *out_lscr; +} + +static int +ngram_search_start(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + ngs->done = FALSE; + ngram_model_flush(ngs->lmset); + if (ngs->fwdtree) + ngram_fwdtree_start(ngs); + else if (ngs->fwdflat) + ngram_fwdflat_start(ngs); + else + return -1; + return 0; +} + +static int +ngram_search_step(ps_search_t *search, int frame_idx) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + if (ngs->fwdtree) + return ngram_fwdtree_search(ngs, frame_idx); + else if (ngs->fwdflat) + return ngram_fwdflat_search(ngs, frame_idx); + else + return -1; +} + +void +dump_bptable(ngram_search_t *ngs) +{ + int i; + E_INFO("Backpointer table (%d entries):\n", ngs->bpidx); + for (i = 0; i < ngs->bpidx; ++i) { + bptbl_t *bpe = ngs->bp_table + i; + int j, rcsize; + + E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d", + i, dict_wordstr(ps_search_dict(ngs), bpe->wid), + (bpe->bp == -1 + ? 0 : ngs->bp_table[bpe->bp].frame + 1), + bpe->frame, bpe->score, bpe->bp, + bpe->real_wid, bpe->prev_real_wid); + + if (bpe->last2_phone == -1) + rcsize = 0; + else + rcsize = dict2pid_rssid(ps_search_dict2pid(ngs), + bpe->last_phone, bpe->last2_phone)->n_ssid; + if (rcsize) { + E_INFOCONT("\tbss"); + for (j = 0; j < rcsize; ++j) + if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE) + E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]); + } + E_INFOCONT("\n"); + } +} + +static int +ngram_search_finish(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + ngs->n_tot_frame += ngs->n_frame; + if (ngs->fwdtree) { + ngram_fwdtree_finish(ngs); + /* dump_bptable(ngs); */ + + /* Now do fwdflat search in its entirety, if requested. */ + if (ngs->fwdflat) { + int i; + /* Rewind the acoustic model. */ + if (acmod_rewind(ps_search_acmod(ngs)) < 0) + return -1; + /* Now redo search. */ + ngram_fwdflat_start(ngs); + i = 0; + while (ps_search_acmod(ngs)->n_feat_frame > 0) { + int nfr; + if ((nfr = ngram_fwdflat_search(ngs, i)) < 0) + return nfr; + acmod_advance(ps_search_acmod(ngs)); + ++i; + } + ngram_fwdflat_finish(ngs); + /* And now, we should have a result... */ + /* dump_bptable(ngs); */ + } + } + else if (ngs->fwdflat) { + ngram_fwdflat_finish(ngs); + } + + /* Mark the current utterance as done. */ + ngs->done = TRUE; + return 0; +} + +static ps_latlink_t * +ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + if (search->last_link == NULL) { + search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset, + ngs->bestpath_fwdtree_lw_ratio, + ngs->ascale); + if (search->last_link == NULL) + return NULL; + /* Also calculate betas so we can fill in the posterior + * probability field in the segmentation. */ + if (search->post == 0) + search->post = ps_lattice_posterior(search->dag, ngs->lmset, + ngs->ascale); + } + if (out_score) + *out_score = search->last_link->path_scr + search->dag->final_node_ascr; + return search->last_link; +} + +static char const * +ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + /* Only do bestpath search if the utterance is complete. */ + if (ngs->bestpath && ngs->done) { + ps_lattice_t *dag; + ps_latlink_t *link; + char const *hyp; + double n_speech; + + ptmr_reset(&ngs->bestpath_perf); + ptmr_start(&ngs->bestpath_perf); + if ((dag = ngram_search_lattice(search)) == NULL) + return NULL; + if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL) + return NULL; + hyp = ps_lattice_hyp(dag, link); + ptmr_stop(&ngs->bestpath_perf); + n_speech = (double)dag->n_frames + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + E_INFO("bestpath %.2f CPU %.3f xRT\n", + ngs->bestpath_perf.t_cpu, + ngs->bestpath_perf.t_cpu / n_speech); + E_INFO("bestpath %.2f wall %.3f xRT\n", + ngs->bestpath_perf.t_elapsed, + ngs->bestpath_perf.t_elapsed / n_speech); + return hyp; + } + else { + int32 bpidx; + + /* fwdtree and fwdflat use same backpointer table. */ + bpidx = ngram_search_find_exit(ngs, -1, out_score, out_is_final); + if (bpidx != NO_BP) + return ngram_search_bp_hyp(ngs, bpidx); + } + + return NULL; +} + +static void +ngram_search_bp2itor(ps_seg_t *seg, int bp) +{ + ngram_search_t *ngs = (ngram_search_t *)seg->search; + bptbl_t *be, *pbe; + + be = &ngs->bp_table[bp]; + pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp]; + seg->word = dict_wordstr(ps_search_dict(ngs), be->wid); + seg->ef = be->frame; + seg->sf = pbe ? pbe->frame + 1 : 0; + seg->prob = 0; /* Bogus value... */ + /* Compute acoustic and LM scores for this segment. */ + if (pbe == NULL) { + seg->ascr = be->score; + seg->lscr = 0; + seg->lback = 0; + } + else { + int32 start_score; + + /* Find ending path score of previous word. */ + start_score = ngram_search_exit_score(ngs, pbe, + dict_first_phone(ps_search_dict(ngs), be->wid)); + assert(start_score BETTER_THAN WORST_SCORE); + if (be->wid == ps_search_silence_wid(ngs)) { + seg->lscr = ngs->silpen; + } + else if (dict_filler_word(ps_search_dict(ngs), be->wid)) { + seg->lscr = ngs->fillpen; + } + else { + seg->lscr = ngram_tg_score(ngs->lmset, + be->real_wid, + pbe->real_wid, + pbe->prev_real_wid, + &seg->lback)>>SENSCR_SHIFT; + seg->lscr = (int32)(seg->lscr * seg->lwf); + } + seg->ascr = be->score - start_score - seg->lscr; + } +} + +static void +ngram_bp_seg_free(ps_seg_t *seg) +{ + bptbl_seg_t *itor = (bptbl_seg_t *)seg; + + ckd_free(itor->bpidx); + ckd_free(itor); +} + +static ps_seg_t * +ngram_bp_seg_next(ps_seg_t *seg) +{ + bptbl_seg_t *itor = (bptbl_seg_t *)seg; + + if (++itor->cur == itor->n_bpidx) { + ngram_bp_seg_free(seg); + return NULL; + } + + ngram_search_bp2itor(seg, itor->bpidx[itor->cur]); + return seg; +} + +static ps_segfuncs_t ngram_bp_segfuncs = { + /* seg_next */ ngram_bp_seg_next, + /* seg_free */ ngram_bp_seg_free +}; + +static ps_seg_t * +ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf) +{ + bptbl_seg_t *itor; + int bp, cur; + + /* Calling this an "iterator" is a bit of a misnomer since we have + * to get the entire backtrace in order to produce it. On the + * other hand, all we actually need is the bptbl IDs, and we can + * allocate a fixed-size array of them. */ + itor = ckd_calloc(1, sizeof(*itor)); + itor->base.vt = &ngram_bp_segfuncs; + itor->base.search = ps_search_base(ngs); + itor->base.lwf = lwf; + itor->n_bpidx = 0; + bp = bpidx; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + bp = be->bp; + ++itor->n_bpidx; + } + if (itor->n_bpidx == 0) { + ckd_free(itor); + return NULL; + } + itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx)); + cur = itor->n_bpidx - 1; + bp = bpidx; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + itor->bpidx[cur] = bp; + bp = be->bp; + --cur; + } + + /* Fill in relevant fields for first element. */ + ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]); + + return (ps_seg_t *)itor; +} + +static ps_seg_t * +ngram_search_seg_iter(ps_search_t *search, int32 *out_score) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + /* Only do bestpath search if the utterance is done. */ + if (ngs->bestpath && ngs->done) { + ps_lattice_t *dag; + ps_latlink_t *link; + double n_speech; + ps_seg_t *itor; + + ptmr_reset(&ngs->bestpath_perf); + ptmr_start(&ngs->bestpath_perf); + if ((dag = ngram_search_lattice(search)) == NULL) + return NULL; + if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL) + return NULL; + itor = ps_lattice_seg_iter(dag, link, + ngs->bestpath_fwdtree_lw_ratio); + ptmr_stop(&ngs->bestpath_perf); + n_speech = (double)dag->n_frames + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + E_INFO("bestpath %.2f CPU %.3f xRT\n", + ngs->bestpath_perf.t_cpu, + ngs->bestpath_perf.t_cpu / n_speech); + E_INFO("bestpath %.2f wall %.3f xRT\n", + ngs->bestpath_perf.t_elapsed, + ngs->bestpath_perf.t_elapsed / n_speech); + return itor; + } + else { + int32 bpidx; + + /* fwdtree and fwdflat use same backpointer table. */ + bpidx = ngram_search_find_exit(ngs, -1, out_score, NULL); + return ngram_search_bp_iter(ngs, bpidx, + /* but different language weights... */ + (ngs->done && ngs->fwdflat) + ? ngs->fwdflat_fwdtree_lw_ratio : 1.0); + } + + return NULL; +} + +static int32 +ngram_search_prob(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + /* Only do bestpath search if the utterance is done. */ + if (ngs->bestpath && ngs->done) { + ps_lattice_t *dag; + ps_latlink_t *link; + + if ((dag = ngram_search_lattice(search)) == NULL) + return 0; + if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL) + return 0; + return search->post; + } + else { + /* FIXME: Give some kind of good estimate here, eventually. */ + return 0; + } +} + +static void +create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag) +{ + bptbl_t *bp_ptr; + int32 i; + + for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) { + int32 sf, ef, wid; + ps_latnode_t *node; + + /* Skip invalid backpointers (these result from -maxwpf pruning) */ + if (!bp_ptr->valid) + continue; + + sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1; + ef = bp_ptr->frame; + wid = bp_ptr->wid; + + assert(ef < dag->n_frames); + /* Skip non-final </s> entries. */ + if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1)) + continue; + + /* Skip if word not in LM */ + if ((!dict_filler_word(ps_search_dict(ngs), wid)) + && (!ngram_model_set_known_wid(ngs->lmset, + dict_basewid(ps_search_dict(ngs), wid)))) + continue; + + /* See if bptbl entry <wid,sf> already in lattice */ + for (node = dag->nodes; node; node = node->next) { + if ((node->wid == wid) && (node->sf == sf)) + break; + } + + /* For the moment, store bptbl indices in node.{fef,lef} */ + if (node) + node->lef = i; + else { + /* New node; link to head of list */ + node = listelem_malloc(dag->latnode_alloc); + node->wid = wid; + node->sf = sf; /* This is a frame index. */ + node->fef = node->lef = i; /* These are backpointer indices (argh) */ + node->reachable = FALSE; + node->entries = NULL; + node->exits = NULL; + + /* NOTE: This creates the list of nodes in reverse + * topological order, i.e. a node always precedes its + * antecedents in this list. */ + node->next = dag->nodes; + dag->nodes = node; + ++dag->n_nodes; + } + } +} + +static ps_latnode_t * +find_start_node(ngram_search_t *ngs, ps_lattice_t *dag) +{ + ps_latnode_t *node; + + /* Find start node <s>.0 */ + for (node = dag->nodes; node; node = node->next) { + if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0)) + break; + } + if (!node) { + /* This is probably impossible. */ + E_ERROR("Couldn't find <s> in first frame\n"); + return NULL; + } + return node; +} + +static ps_latnode_t * +find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf) +{ + ps_latnode_t *node; + int32 ef, bestbp, bp, bestscore; + + /* Find final node </s>.last_frame; nothing can follow this node */ + for (node = dag->nodes; node; node = node->next) { + int32 lef = ngs->bp_table[node->lef].frame; + if ((node->wid == ps_search_finish_wid(ngs)) + && (lef == dag->n_frames - 1)) + break; + } + if (node != NULL) + return node; + + /* It is quite likely that no </s> exited in the last frame. So, + * find the node corresponding to the best exit. */ + /* Find the last frame containing a word exit. */ + for (ef = dag->n_frames - 1; + ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx; + --ef); + if (ef < 0) { + E_ERROR("Empty backpointer table: can not build DAG.\n"); + return NULL; + } + + /* Find best word exit in that frame. */ + bestscore = WORST_SCORE; + bestbp = NO_BP; + for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) { + int32 n_used, l_scr, wid, prev_wid; + wid = ngs->bp_table[bp].real_wid; + prev_wid = ngs->bp_table[bp].prev_real_wid; + /* Always prefer </s>, of which there will only be one per frame. */ + if (wid == ps_search_finish_wid(ngs)) { + bestbp = bp; + break; + } + l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs), + wid, prev_wid, &n_used) >>SENSCR_SHIFT; + l_scr = l_scr * lwf; + if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) { + bestscore = ngs->bp_table[bp].score + l_scr; + bestbp = bp; + } + } + if (bestbp == NO_BP) { + E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef); + return NULL; + } + E_INFO("</s> not found in last frame, using %s.%d instead\n", + dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef); + + /* Now find the node that corresponds to it. */ + for (node = dag->nodes; node; node = node->next) { + if (node->lef == bestbp) + return node; + } + + /* FIXME: This seems to happen a lot! */ + E_ERROR("Failed to find DAG node corresponding to %s\n", + dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid)); + return NULL; +} + +/* + * Build lattice from bptable. + */ +ps_lattice_t * +ngram_search_lattice(ps_search_t *search) +{ + int32 i, score, ascr, lscr; + ps_latnode_t *node, *from, *to; + ngram_search_t *ngs; + ps_lattice_t *dag; + int min_endfr, nlink; + float lwf; + + ngs = (ngram_search_t *)search; + min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr"); + + /* If the best score is WORST_SCORE or worse, there is no way to + * make a lattice. */ + if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) + return NULL; + + /* Check to see if a lattice has previously been created over the + * same number of frames, and reuse it if so. */ + if (search->dag && search->dag->n_frames == ngs->n_frame) + return search->dag; + + /* Nope, create a new one. */ + ps_lattice_free(search->dag); + search->dag = NULL; + dag = ps_lattice_init_search(search, ngs->n_frame); + /* Compute these such that they agree with the fwdtree language weight. */ + lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0; + create_dag_nodes(ngs, dag); + if ((dag->start = find_start_node(ngs, dag)) == NULL) + goto error_out; + if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL) + goto error_out; + E_INFO("lattice start node %s.%d end node %s.%d\n", + dict_wordstr(search->dict, dag->start->wid), dag->start->sf, + dict_wordstr(search->dict, dag->end->wid), dag->end->sf); + + ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf, + &dag->final_node_ascr, &lscr); + + /* + * At this point, dag->nodes is ordered such that nodes earlier in + * the list can follow (in time) those later in the list, but not + * vice versa (see above - also note that adjacency is purely + * determined by time which is why we can make this claim). Now + * create precedence links and simultanesously mark all nodes that + * can reach dag->end. (All nodes are reached from dag->start + * simply by definition - they were created that way). + * + * Note that this also means that any nodes before dag->end in the + * list can be discarded, meaning that dag->end will always be + * equal to dag->nodes (FIXME: except when loading from a file but + * we can fix that...) + */ + i = 0; + while (dag->nodes && dag->nodes != dag->end) { + ps_latnode_t *next = dag->nodes->next; + listelem_free(dag->latnode_alloc, dag->nodes); + dag->nodes = next; + ++i; + } + E_INFO("Eliminated %d nodes before end node\n", i); + dag->end->reachable = TRUE; + nlink = 0; + for (to = dag->end; to; to = to->next) { + int fef, lef; + + /* Skip if not reachable; it will never be reachable from dag->end */ + if (!to->reachable) + continue; + + /* Prune nodes with too few endpoints - heuristic + borrowed from Sphinx3 */ + fef = ngs->bp_table[to->fef].frame; + lef = ngs->bp_table[to->lef].frame; + if (to != dag->end && lef - fef < min_endfr) { + to->reachable = FALSE; + continue; + } + + /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */ + for (from = to->next; from; from = from->next) { + bptbl_t *from_bpe; + + fef = ngs->bp_table[from->fef].frame; + lef = ngs->bp_table[from->lef].frame; + + if ((to->sf <= fef) || (to->sf > lef + 1)) + continue; + if (lef - fef < min_endfr) { + assert(!from->reachable); + continue; + } + + /* Find bptable entry for "from" that exactly precedes "to" */ + i = from->fef; + from_bpe = ngs->bp_table + i; + for (; i <= from->lef; i++, from_bpe++) { + if (from_bpe->wid != from->wid) + continue; + if (from_bpe->frame >= to->sf - 1) + break; + } + + if ((i > from->lef) || (from_bpe->frame != to->sf - 1)) + continue; + + /* Find acoustic score from.sf->to.sf-1 with right context = to */ + /* This gives us from_bpe's best acoustic score. */ + ngram_compute_seg_score(ngs, from_bpe, lwf, + &ascr, &lscr); + /* Now find the exact path score for from->to, including + * the appropriate final triphone. In fact this might not + * exist. */ + score = ngram_search_exit_score(ngs, from_bpe, + dict_first_phone(ps_search_dict(ngs), to->wid)); + /* Does not exist. Can't create a link here. */ + if (score == WORST_SCORE) + continue; + /* Adjust the arc score to match the correct triphone. */ + else + score = ascr + (score - from_bpe->score); + if (score BETTER_THAN 0) { + /* Scores must be negative, or Bad Things will happen. + In general, they are, except in corner cases + involving filler words. We don't want to throw any + links away so we'll keep these, but with some + arbitrarily improbable but recognizable score. */ + ps_lattice_link(dag, from, to, -424242, from_bpe->frame); + ++nlink; + from->reachable = TRUE; + } + else if (score BETTER_THAN WORST_SCORE) { + ps_lattice_link(dag, from, to, score, from_bpe->frame); + ++nlink; + from->reachable = TRUE; + } + } + } + + /* There must be at least one path between dag->start and dag->end */ + if (!dag->start->reachable) { + E_ERROR("End node of lattice isolated; unreachable\n"); + goto error_out; + } + + for (node = dag->nodes; node; node = node->next) { + /* Change node->{fef,lef} from bptbl indices to frames. */ + node->fef = ngs->bp_table[node->fef].frame; + node->lef = ngs->bp_table[node->lef].frame; + /* Find base wid for nodes. */ + node->basewid = dict_basewid(search->dict, node->wid); + } + + /* Link nodes with alternate pronunciations at the same timepoint. */ + for (node = dag->nodes; node; node = node->next) { + ps_latnode_t *alt; + /* Scan forward to find the next alternate, then stop. */ + for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) { + if (alt->basewid == node->basewid) { + alt->alt = node->alt; + node->alt = alt; + break; + } + } + } + E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink); + + /* Minor hack: If the final node is a filler word and not </s>, + * then set its base word ID to </s>, so that the language model + * scores won't be screwed up. */ + if (dict_filler_word(ps_search_dict(ngs), dag->end->wid)) + dag->end->basewid = ps_search_finish_wid(ngs); + + /* Free nodes unreachable from dag->end and their links */ + ps_lattice_delete_unreachable(dag); + + /* Add silprob and fillprob to corresponding links */ + ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen); + + search->dag = dag; + return dag; + +error_out: + ps_lattice_free(dag); + return NULL; +} + +void ngram_search_set_lm(ngram_model_t *lm) +{ + default_lm = ngram_model_retain(lm); +} + |