diff options
Diffstat (limited to 'media/pocketsphinx/src/phone_loop_search.c')
-rw-r--r-- | media/pocketsphinx/src/phone_loop_search.c | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/phone_loop_search.c b/media/pocketsphinx/src/phone_loop_search.c new file mode 100644 index 000000000..97b13360f --- /dev/null +++ b/media/pocketsphinx/src/phone_loop_search.c @@ -0,0 +1,367 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file phone_loop_search.h Fast and rough context-independent phoneme loop search. + */ + +#include <sphinxbase/err.h> + +#include "phone_loop_search.h" + +static int phone_loop_search_start(ps_search_t *search); +static int phone_loop_search_step(ps_search_t *search, int frame_idx); +static int phone_loop_search_finish(ps_search_t *search); +static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); +static void phone_loop_search_free(ps_search_t *search); +static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final); +static int32 phone_loop_search_prob(ps_search_t *search); +static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score); + +static ps_searchfuncs_t phone_loop_search_funcs = { + /* name: */ "phone_loop", + /* start: */ phone_loop_search_start, + /* step: */ phone_loop_search_step, + /* finish: */ phone_loop_search_finish, + /* reinit: */ phone_loop_search_reinit, + /* free: */ phone_loop_search_free, + /* lattice: */ NULL, + /* hyp: */ phone_loop_search_hyp, + /* prob: */ phone_loop_search_prob, + /* seg_iter: */ phone_loop_search_seg_iter, +}; + +static int +phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + cmd_ln_t *config = ps_search_config(search); + acmod_t *acmod = ps_search_acmod(search); + int i; + + /* Free old dict2pid, dict, if necessary. */ + ps_search_base_reinit(search, dict, d2p); + + /* Initialize HMM context. */ + if (pls->hmmctx) + hmm_context_free(pls->hmmctx); + pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), + acmod->tmat->tp, NULL, acmod->mdef->sseq); + if (pls->hmmctx == NULL) + return -1; + + /* Initialize penalty storage */ + pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); + pls->window = cmd_ln_int32_r(config, "-pl_window"); + if (pls->penalties) + ckd_free(pls->penalties); + pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); + if (pls->pen_buf) + ckd_free_2d(pls->pen_buf); + pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); + + /* Initialize phone HMMs. */ + if (pls->hmms) { + for (i = 0; i < pls->n_phones; ++i) + hmm_deinit((hmm_t *)&pls->hmms[i]); + ckd_free(pls->hmms); + } + pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); + for (i = 0; i < pls->n_phones; ++i) { + hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], + FALSE, + bin_mdef_pid2ssid(acmod->mdef, i), + bin_mdef_pid2tmatid(acmod->mdef, i)); + } + pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); + pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; + pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; + pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; + E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", + pls->beam, pls->pbeam, pls->pip); + + return 0; +} + +ps_search_t * +phone_loop_search_init(cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict) +{ + phone_loop_search_t *pls; + + /* Allocate and initialize. */ + pls = (phone_loop_search_t *)ckd_calloc(1, sizeof(*pls)); + ps_search_init(ps_search_base(pls), &phone_loop_search_funcs, + config, acmod, dict, NULL); + phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls), + ps_search_dict2pid(pls)); + + return ps_search_base(pls); +} + +static void +phone_loop_search_free_renorm(phone_loop_search_t *pls) +{ + gnode_t *gn; + for (gn = pls->renorm; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(pls->renorm); + pls->renorm = NULL; +} + +static void +phone_loop_search_free(ps_search_t *search) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + int i; + + ps_search_deinit(search); + for (i = 0; i < pls->n_phones; ++i) + hmm_deinit((hmm_t *)&pls->hmms[i]); + phone_loop_search_free_renorm(pls); + ckd_free_2d(pls->pen_buf); + ckd_free(pls->hmms); + ckd_free(pls->penalties); + hmm_context_free(pls->hmmctx); + ckd_free(pls); +} + +static int +phone_loop_search_start(ps_search_t *search) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + int i; + + /* Reset and enter all phone HMMs. */ + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + hmm_clear(hmm); + hmm_enter(hmm, 0, -1, 0); + } + memset(pls->penalties, 0, pls->n_phones * sizeof(*pls->penalties)); + for (i = 0; i < pls->window; i++) + memset(pls->pen_buf[i], 0, pls->n_phones * sizeof(*pls->pen_buf[i])); + phone_loop_search_free_renorm(pls); + pls->best_score = 0; + pls->pen_buf_ptr = 0; + + return 0; +} + +static void +renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm) +{ + phone_loop_renorm_t *rn = (phone_loop_renorm_t *)ckd_calloc(1, sizeof(*rn)); + int i; + + pls->renorm = glist_add_ptr(pls->renorm, rn); + rn->frame_idx = frame_idx; + rn->norm = norm; + + for (i = 0; i < pls->n_phones; ++i) { + hmm_normalize((hmm_t *)&pls->hmms[i], norm); + } +} + +static void +evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx) +{ + int32 bs = WORST_SCORE; + int i; + + hmm_context_set_senscore(pls->hmmctx, senscr); + + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + int32 score; + + if (hmm_frame(hmm) < frame_idx) + continue; + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bs) { + bs = score; + } + } + pls->best_score = bs; +} + +static void +store_scores(phone_loop_search_t *pls, int frame_idx) +{ + int i, j, itr; + + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + pls->pen_buf[pls->pen_buf_ptr][i] = (hmm_bestscore(hmm) - pls->best_score) * pls->penalty_weight; + } + pls->pen_buf_ptr++; + pls->pen_buf_ptr = pls->pen_buf_ptr % pls->window; + + //update penalties + for (i = 0; i < pls->n_phones; ++i) { + pls->penalties[i] = WORST_SCORE; + for (j = 0, itr = pls->pen_buf_ptr + 1; j < pls->window; j++, itr++) { + itr = itr % pls->window; + if (pls->pen_buf[itr][i] > pls->penalties[i]) + pls->penalties[i] = pls->pen_buf[itr][i]; + } + } +} + +static void +prune_hmms(phone_loop_search_t *pls, int frame_idx) +{ + int32 thresh = pls->best_score + pls->beam; + int nf = frame_idx + 1; + int i; + + /* Check all phones to see if they remain active in the next frame. */ + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + + if (hmm_frame(hmm) < frame_idx) + continue; + /* Retain if score better than threshold. */ + if (hmm_bestscore(hmm) BETTER_THAN thresh) { + hmm_frame(hmm) = nf; + } + else + hmm_clear_scores(hmm); + } +} + +static void +phone_transition(phone_loop_search_t *pls, int frame_idx) +{ + int32 thresh = pls->best_score + pls->pbeam; + int nf = frame_idx + 1; + int i; + + /* Now transition out of phones whose last states are inside the + * phone transition beam. */ + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + int32 newphone_score; + int j; + + if (hmm_frame(hmm) != nf) + continue; + + newphone_score = hmm_out_score(hmm) + pls->pip; + if (newphone_score BETTER_THAN thresh) { + /* Transition into all phones using the usual Viterbi rule. */ + for (j = 0; j < pls->n_phones; ++j) { + hmm_t *nhmm = (hmm_t *)&pls->hmms[j]; + + if (hmm_frame(nhmm) < frame_idx + || newphone_score BETTER_THAN hmm_in_score(nhmm)) { + hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); + } + } + } + } +} + +static int +phone_loop_search_step(ps_search_t *search, int frame_idx) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + acmod_t *acmod = ps_search_acmod(search); + int16 const *senscr; + int i; + + /* All CI senones are active all the time. */ + if (!ps_search_acmod(pls)->compallsen) { + acmod_clear_active(ps_search_acmod(pls)); + for (i = 0; i < pls->n_phones; ++i) + acmod_activate_hmm(acmod, (hmm_t *)&pls->hmms[i]); + } + + /* Calculate senone scores for current frame. */ + senscr = acmod_score(acmod, &frame_idx); + + /* Renormalize, if necessary. */ + if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) { + E_INFO("Renormalizing Scores at frame %d, best score %d\n", + frame_idx, pls->best_score); + renormalize_hmms(pls, frame_idx, pls->best_score); + } + + /* Evaluate phone HMMs for current frame. */ + evaluate_hmms(pls, senscr, frame_idx); + + /* Store hmm scores for senone penaly calculation */ + store_scores(pls, frame_idx); + + /* Prune phone HMMs. */ + prune_hmms(pls, frame_idx); + + /* Do phone transitions. */ + phone_transition(pls, frame_idx); + + return 0; +} + +static int +phone_loop_search_finish(ps_search_t *search) +{ + /* Actually nothing to do here really. */ + return 0; +} + +static char const * +phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final) +{ + E_WARN("Hypotheses are not returned from phone loop search"); + return NULL; +} + +static int32 +phone_loop_search_prob(ps_search_t *search) +{ + /* FIXME: Actually... they ought to be. */ + E_WARN("Posterior probabilities are not returned from phone loop search"); + return 0; +} + +static ps_seg_t * +phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score) +{ + E_WARN("Hypotheses are not returned from phone loop search"); + return NULL; +} |