diff options
author | Moonchild <moonchild@palemoon.org> | 2020-05-20 10:19:04 +0000 |
---|---|---|
committer | Moonchild <moonchild@palemoon.org> | 2020-05-20 14:04:17 +0000 |
commit | 99c2e698d2a3c56649e42d8d2133706cd8c9501e (patch) | |
tree | 85be449d772eb57860f0f386efb4bc1e790fd498 /media/pocketsphinx/src/state_align_search.c | |
parent | 15ac4021b06d549e47c9e2efc9364a9eb96bfe82 (diff) | |
download | UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.gz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.lz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.xz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.zip |
Issue #1538 - remove speech recognition engine
This removes speech recognition, pocketsphinx, training models
and the speech automated test interface.
This also re-establishes proper use of MOZ_WEBSPEECH to work
for the speech API (synthesis part only) that was a broken mess
before, with some synth parts being always built, some parts
being built only with it enabled and recognition parts being
dependent on it. I'm pretty sure it'd be totally busted if you'd
ever have tried building without MOZ_WEBPEECH before.
Tested that synthesis still works as-intended.
This resolves #1538
Diffstat (limited to 'media/pocketsphinx/src/state_align_search.c')
-rw-r--r-- | media/pocketsphinx/src/state_align_search.c | 310 |
1 files changed, 0 insertions, 310 deletions
diff --git a/media/pocketsphinx/src/state_align_search.c b/media/pocketsphinx/src/state_align_search.c deleted file mode 100644 index 33d8d70b5..000000000 --- a/media/pocketsphinx/src/state_align_search.c +++ /dev/null @@ -1,310 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2010 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - -/** - * @file state_align_search.c State (and phone and word) alignment search. - */ - -#include "state_align_search.h" - -static int -state_align_search_start(ps_search_t *search) -{ - state_align_search_t *sas = (state_align_search_t *)search; - - /* Activate the initial state. */ - hmm_enter(sas->hmms, 0, 0, 0); - - return 0; -} - -static void -renormalize_hmms(state_align_search_t *sas, int frame_idx, int32 norm) -{ - int i; - for (i = 0; i < sas->n_phones; ++i) - hmm_normalize(sas->hmms + i, norm); -} - -static int32 -evaluate_hmms(state_align_search_t *sas, int16 const *senscr, int frame_idx) -{ - int32 bs = WORST_SCORE; - int i; - - hmm_context_set_senscore(sas->hmmctx, senscr); - - for (i = 0; i < sas->n_phones; ++i) { - hmm_t *hmm = sas->hmms + i; - int32 score; - - if (hmm_frame(hmm) < frame_idx) - continue; - score = hmm_vit_eval(hmm); - if (score BETTER_THAN bs) { - bs = score; - } - } - return bs; -} - -static void -prune_hmms(state_align_search_t *sas, int frame_idx) -{ - int nf = frame_idx + 1; - int i; - - /* Check all phones to see if they remain active in the next frame. */ - for (i = 0; i < sas->n_phones; ++i) { - hmm_t *hmm = sas->hmms + i; - if (hmm_frame(hmm) < frame_idx) - continue; - hmm_frame(hmm) = nf; - } -} - -static void -phone_transition(state_align_search_t *sas, int frame_idx) -{ - int nf = frame_idx + 1; - int i; - - for (i = 0; i < sas->n_phones - 1; ++i) { - hmm_t *hmm, *nhmm; - int32 newphone_score; - - hmm = sas->hmms + i; - if (hmm_frame(hmm) != nf) - continue; - - newphone_score = hmm_out_score(hmm); - /* Transition into next phone using the usual Viterbi rule. */ - nhmm = hmm + 1; - if (hmm_frame(nhmm) < frame_idx - || newphone_score BETTER_THAN hmm_in_score(nhmm)) { - hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); - } - } -} - -#define TOKEN_STEP 20 -static void -extend_tokenstack(state_align_search_t *sas, int frame_idx) -{ - if (frame_idx >= sas->n_fr_alloc) { - sas->n_fr_alloc = frame_idx + TOKEN_STEP + 1; - sas->tokens = ckd_realloc(sas->tokens, - sas->n_emit_state * sas->n_fr_alloc - * sizeof(*sas->tokens)); - } - memset(sas->tokens + frame_idx * sas->n_emit_state, 0xff, - sas->n_emit_state * sizeof(*sas->tokens)); -} - -static void -record_transitions(state_align_search_t *sas, int frame_idx) -{ - uint16 *tokens; - int i; - - /* Push another frame of tokens on the stack. */ - extend_tokenstack(sas, frame_idx); - tokens = sas->tokens + frame_idx * sas->n_emit_state; - - /* Scan all active HMMs */ - for (i = 0; i < sas->n_phones; ++i) { - hmm_t *hmm = sas->hmms + i; - int j; - - if (hmm_frame(hmm) < frame_idx) - continue; - for (j = 0; j < sas->hmmctx->n_emit_state; ++j) { - int state_idx = i * sas->hmmctx->n_emit_state + j; - /* Record their backpointers on the token stack. */ - tokens[state_idx] = hmm_history(hmm, j); - /* Update backpointer fields with state index. */ - hmm_history(hmm, j) = state_idx; - } - } -} - -static int -state_align_search_step(ps_search_t *search, int frame_idx) -{ - state_align_search_t *sas = (state_align_search_t *)search; - acmod_t *acmod = ps_search_acmod(search); - int16 const *senscr; - int i; - - /* Calculate senone scores. */ - for (i = 0; i < sas->n_phones; ++i) - acmod_activate_hmm(acmod, sas->hmms + i); - senscr = acmod_score(acmod, &frame_idx); - - /* Renormalize here if needed. */ - /* FIXME: Make sure to (unit-)test this!!! */ - if ((sas->best_score - 0x300000) WORSE_THAN WORST_SCORE) { - E_INFO("Renormalizing Scores at frame %d, best score %d\n", - frame_idx, sas->best_score); - renormalize_hmms(sas, frame_idx, sas->best_score); - } - - /* Viterbi step. */ - sas->best_score = evaluate_hmms(sas, senscr, frame_idx); - prune_hmms(sas, frame_idx); - - /* Transition out of non-emitting states. */ - phone_transition(sas, frame_idx); - - /* Generate new tokens from best path results. */ - record_transitions(sas, frame_idx); - - /* Update frame counter */ - sas->frame = frame_idx; - - return 0; -} - -static int -state_align_search_finish(ps_search_t *search) -{ - state_align_search_t *sas = (state_align_search_t *)search; - hmm_t *final_phone = sas->hmms + sas->n_phones - 1; - ps_alignment_iter_t *itor; - ps_alignment_entry_t *ent; - int next_state, next_start, state, frame; - - /* Best state exiting the last frame. */ - next_state = state = hmm_out_history(final_phone); - if (state == 0xffff) { - E_ERROR("Failed to reach final state in alignment\n"); - return -1; - } - itor = ps_alignment_states(sas->al); - next_start = sas->frame + 1; - for (frame = sas->frame - 1; frame >= 0; --frame) { - state = sas->tokens[frame * sas->n_emit_state + state]; - /* State boundary, update alignment entry for next state. */ - if (state != next_state) { - itor = ps_alignment_iter_goto(itor, next_state); - assert(itor != NULL); - ent = ps_alignment_iter_get(itor); - ent->start = frame + 1; - ent->duration = next_start - ent->start; - E_DEBUG(1,("state %d start %d end %d\n", next_state, - ent->start, next_start)); - next_state = state; - next_start = frame + 1; - } - } - /* Update alignment entry for initial state. */ - itor = ps_alignment_iter_goto(itor, 0); - assert(itor != NULL); - ent = ps_alignment_iter_get(itor); - ent->start = 0; - ent->duration = next_start; - E_DEBUG(1,("state %d start %d end %d\n", 0, - ent->start, next_start)); - ps_alignment_iter_free(itor); - ps_alignment_propagate(sas->al); - - return 0; -} - -static int -state_align_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) -{ - /* This does nothing. */ - return 0; -} - -static void -state_align_search_free(ps_search_t *search) -{ - state_align_search_t *sas = (state_align_search_t *)search; - ps_search_deinit(search); - ckd_free(sas->hmms); - ckd_free(sas->tokens); - hmm_context_free(sas->hmmctx); - ckd_free(sas); -} - -static ps_searchfuncs_t state_align_search_funcs = { - /* name: */ "state_align", - /* start: */ state_align_search_start, - /* step: */ state_align_search_step, - /* finish: */ state_align_search_finish, - /* reinit: */ state_align_search_reinit, - /* free: */ state_align_search_free, - /* lattice: */ NULL, - /* hyp: */ NULL, - /* prob: */ NULL, - /* seg_iter: */ NULL, -}; - -ps_search_t * -state_align_search_init(cmd_ln_t *config, - acmod_t *acmod, - ps_alignment_t *al) -{ - state_align_search_t *sas; - ps_alignment_iter_t *itor; - hmm_t *hmm; - - sas = ckd_calloc(1, sizeof(*sas)); - ps_search_init(ps_search_base(sas), &state_align_search_funcs, - config, acmod, al->d2p->dict, al->d2p); - sas->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), - acmod->tmat->tp, NULL, acmod->mdef->sseq); - if (sas->hmmctx == NULL) { - ckd_free(sas); - return NULL; - } - sas->al = al; - - /* Generate HMM vector from phone level of alignment. */ - sas->n_phones = ps_alignment_n_phones(al); - sas->n_emit_state = ps_alignment_n_states(al); - sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms)); - for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor; - ++hmm, itor = ps_alignment_iter_next(itor)) { - ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); - hmm_init(sas->hmmctx, hmm, FALSE, - ent->id.pid.ssid, ent->id.pid.tmatid); - } - return ps_search_base(sas); -} |