diff options
author | Moonchild <moonchild@palemoon.org> | 2020-05-20 10:19:04 +0000 |
---|---|---|
committer | Moonchild <moonchild@palemoon.org> | 2020-05-20 14:04:17 +0000 |
commit | 99c2e698d2a3c56649e42d8d2133706cd8c9501e (patch) | |
tree | 85be449d772eb57860f0f386efb4bc1e790fd498 /media/pocketsphinx/src/pocketsphinx.c | |
parent | 15ac4021b06d549e47c9e2efc9364a9eb96bfe82 (diff) | |
download | UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.gz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.lz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.xz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.zip |
Issue #1538 - remove speech recognition engine
This removes speech recognition, pocketsphinx, training models
and the speech automated test interface.
This also re-establishes proper use of MOZ_WEBSPEECH to work
for the speech API (synthesis part only) that was a broken mess
before, with some synth parts being always built, some parts
being built only with it enabled and recognition parts being
dependent on it. I'm pretty sure it'd be totally busted if you'd
ever have tried building without MOZ_WEBPEECH before.
Tested that synthesis still works as-intended.
This resolves #1538
Diffstat (limited to 'media/pocketsphinx/src/pocketsphinx.c')
-rw-r--r-- | media/pocketsphinx/src/pocketsphinx.c | 1421 |
1 files changed, 0 insertions, 1421 deletions
diff --git a/media/pocketsphinx/src/pocketsphinx.c b/media/pocketsphinx/src/pocketsphinx.c deleted file mode 100644 index 7514c1fb5..000000000 --- a/media/pocketsphinx/src/pocketsphinx.c +++ /dev/null @@ -1,1421 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2008 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - -/* System headers. */ -#include <stdio.h> -#include <assert.h> - -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif - -/* SphinxBase headers. */ -#include <sphinxbase/err.h> -#include <sphinxbase/strfuncs.h> -#include <sphinxbase/filename.h> -#include <sphinxbase/pio.h> -#include <sphinxbase/jsgf.h> -#include <sphinxbase/hash_table.h> - -/* Local headers. */ -#include "cmdln_macro.h" -#include "pocketsphinx.h" -#include "pocketsphinx_internal.h" -#include "ps_lattice_internal.h" -#include "phone_loop_search.h" -#include "kws_search.h" -#include "fsg_search_internal.h" -#include "ngram_search.h" -#include "ngram_search_fwdtree.h" -#include "ngram_search_fwdflat.h" -#include "allphone_search.h" - -static const arg_t ps_args_def[] = { - POCKETSPHINX_OPTIONS, - CMDLN_EMPTY_OPTION -}; - -/* I'm not sure what the portable way to do this is. */ -static int -file_exists(const char *path) -{ - FILE *tmp; - - tmp = fopen(path, "rb"); - if (tmp) fclose(tmp); - return (tmp != NULL); -} - -#ifdef MODELDIR -static int -hmmdir_exists(const char *path) -{ - FILE *tmp; - char *mdef = string_join(path, "/mdef", NULL); - - tmp = fopen(mdef, "rb"); - if (tmp) fclose(tmp); - ckd_free(mdef); - return (tmp != NULL); -} -#endif - -static void -ps_add_file(ps_decoder_t *ps, const char *arg, - const char *hmmdir, const char *file) -{ - char *tmp = string_join(hmmdir, "/", file, NULL); - - if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp)) - cmd_ln_set_str_r(ps->config, arg, tmp); - ckd_free(tmp); -} - -static void -ps_init_defaults(ps_decoder_t *ps) -{ - /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */ -#ifdef __ADSPBLACKFIN__ - E_INFO("Will not use mmap() on uClinux/Blackfin."); - cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE); -#endif - - char const *hmmdir; - /* Get acoustic model filenames and add them to the command-line */ - if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) { - ps_add_file(ps, "-mdef", hmmdir, "mdef"); - ps_add_file(ps, "-mean", hmmdir, "means"); - ps_add_file(ps, "-var", hmmdir, "variances"); - ps_add_file(ps, "-tmat", hmmdir, "transition_matrices"); - ps_add_file(ps, "-mixw", hmmdir, "mixture_weights"); - ps_add_file(ps, "-sendump", hmmdir, "sendump"); - ps_add_file(ps, "-fdict", hmmdir, "noisedict"); - ps_add_file(ps, "-lda", hmmdir, "feature_transform"); - ps_add_file(ps, "-featparams", hmmdir, "feat.params"); - ps_add_file(ps, "-senmgau", hmmdir, "senmgau"); - } -} - -static void -ps_free_searches(ps_decoder_t *ps) -{ - if (ps->searches) { - /* Release keys manually as we used ckd_salloc to add them, release every search too. */ - hash_iter_t *search_it; - for (search_it = hash_table_iter(ps->searches); search_it; - search_it = hash_table_iter_next(search_it)) { - ckd_free((char *) hash_entry_key(search_it->ent)); - ps_search_free(hash_entry_val(search_it->ent)); - } - - hash_table_empty(ps->searches); - hash_table_free(ps->searches); - } - - ps->searches = NULL; - ps->search = NULL; -} - -static ps_search_t * -ps_find_search(ps_decoder_t *ps, char const *name) -{ - void *search = NULL; - hash_table_lookup(ps->searches, name, &search); - - return (ps_search_t *) search; -} - -void -ps_default_search_args(cmd_ln_t *config) -{ -#ifdef MODELDIR - /* Set default acoustic and language models. */ - const char *hmmdir = cmd_ln_str_r(config, "-hmm"); - if (hmmdir == NULL && hmmdir_exists(MODELDIR "/en-us/en-us")) { - hmmdir = MODELDIR "/en-us/en-us"; - cmd_ln_set_str_r(config, "-hmm", hmmdir); - } - - const char *lmfile = cmd_ln_str_r(config, "-lm"); - - if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg") - && !cmd_ln_str_r(config, "-jsgf") - && !cmd_ln_str_r(config, "-lmctl") - && !cmd_ln_str_r(config, "-kws") - && !cmd_ln_str_r(config, "-keyphrase") - && file_exists(MODELDIR "/en-us/en-us.lm.dmp")) { - lmfile = MODELDIR "/en-us/en-us.lm.dmp"; - cmd_ln_set_str_r(config, "-lm", lmfile); - } - - const char *dictfile = cmd_ln_str_r(config, "-dict"); - if (dictfile == NULL && file_exists(MODELDIR "/en-us/cmudict-en-us.dict")) { - dictfile = MODELDIR "/en-us/cmudict-en-us.dict"; - cmd_ln_set_str_r(config, "-dict", dictfile); - } - - /* Expand acoustic and language model filenames relative to installation - * path. */ - if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) { - char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL); - if (hmmdir_exists(tmphmm)) { - cmd_ln_set_str_r(config, "-hmm", tmphmm); - } else { - E_ERROR("Failed to find mdef file inside the model folder " - "specified with -hmm `%s'\n", hmmdir); - } - ckd_free(tmphmm); - } - if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) { - char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL); - cmd_ln_set_str_r(config, "-lm", tmplm); - ckd_free(tmplm); - } - if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) { - char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL); - cmd_ln_set_str_r(config, "-dict", tmpdict); - ckd_free(tmpdict); - } -#endif -} - -int -ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) -{ - const char *path; - const char *keyphrase; - int32 lw; - - if (config && config != ps->config) { - cmd_ln_free_r(ps->config); - ps->config = cmd_ln_retain(config); - } - - err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); - ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); - ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); - ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); - - /* Fill in some default arguments. */ - ps_init_defaults(ps); - - /* Free old searches (do this before other reinit) */ - ps_free_searches(ps); - ps->searches = hash_table_new(3, HASH_CASE_YES); - - /* Free old acmod. */ - acmod_free(ps->acmod); - ps->acmod = NULL; - - /* Free old dictionary (must be done after the two things above) */ - dict_free(ps->dict); - ps->dict = NULL; - - /* Free d2p */ - dict2pid_free(ps->d2p); - ps->d2p = NULL; - - /* Logmath computation (used in acmod and search) */ - if (ps->lmath == NULL - || (logmath_get_base(ps->lmath) != - (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { - if (ps->lmath) - logmath_free(ps->lmath); - ps->lmath = logmath_init - ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, - cmd_ln_boolean_r(ps->config, "-bestpath")); - } - - /* Acoustic model (this is basically everything that - * uttproc.c, senscr.c, and others used to do) */ - if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) - return -1; - - if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { - /* Initialize an auxiliary phone loop search, which will run in - * "parallel" with FSG or N-Gram search. */ - if ((ps->phone_loop = - phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) - return -1; - hash_table_enter(ps->searches, - ckd_salloc(ps_search_name(ps->phone_loop)), - ps->phone_loop); - } - - /* Dictionary and triphone mappings (depends on acmod). */ - /* FIXME: pass config, change arguments, implement LTS, etc. */ - if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) - return -1; - if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) - return -1; - - lw = cmd_ln_float32_r(config, "-lw"); - - /* Determine whether we are starting out in FSG or N-Gram search mode. - * If neither is used skip search initialization. */ - - /* Load KWS if one was specified in config */ - if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { - if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) - return -1; - ps_set_search(ps, PS_DEFAULT_SEARCH); - } - - if ((path = cmd_ln_str_r(config, "-kws"))) { - if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) - return -1; - ps_set_search(ps, PS_DEFAULT_SEARCH); - } - - /* Load an FSG if one was specified in config */ - if ((path = cmd_ln_str_r(config, "-fsg"))) { - fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); - if (!fsg) - return -1; - if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) - return -1; - ps_set_search(ps, PS_DEFAULT_SEARCH); - } - - /* Or load a JSGF grammar */ - if ((path = cmd_ln_str_r(config, "-jsgf"))) { - if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) - || ps_set_search(ps, PS_DEFAULT_SEARCH)) - return -1; - } - - if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { - if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) - || ps_set_search(ps, PS_DEFAULT_SEARCH)) - return -1; - } - - if ((path = cmd_ln_str_r(ps->config, "-lm")) && - !cmd_ln_boolean_r(ps->config, "-allphone")) { - if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) - || ps_set_search(ps, PS_DEFAULT_SEARCH)) - return -1; - } - - if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { - const char *name; - ngram_model_t *lmset; - ngram_model_set_iter_t *lmset_it; - - if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { - E_ERROR("Failed to read language model control file: %s\n", path); - return -1; - } - - for(lmset_it = ngram_model_set_iter(lmset); - lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { - - ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); - E_INFO("adding search %s\n", name); - if (ps_set_lm(ps, name, lm)) { - ngram_model_free(lm); - ngram_model_set_iter_free(lmset_it); - return -1; - } - ngram_model_free(lm); - } - - name = cmd_ln_str_r(config, "-lmname"); - if (name) - ps_set_search(ps, name); - else { - E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); - return -1; - } - } - - /* Initialize performance timer. */ - ps->perf.name = "decode"; - ptmr_init(&ps->perf); - - return 0; -} - -ps_decoder_t * -ps_init(cmd_ln_t *config) -{ - ps_decoder_t *ps; - - ps = ckd_calloc(1, sizeof(*ps)); - ps->refcount = 1; - if (ps_reinit(ps, config) < 0) { - ps_free(ps); - return NULL; - } - return ps; -} - -arg_t const * -ps_args(void) -{ - return ps_args_def; -} - -ps_decoder_t * -ps_retain(ps_decoder_t *ps) -{ - ++ps->refcount; - return ps; -} - -int -ps_free(ps_decoder_t *ps) -{ - if (ps == NULL) - return 0; - if (--ps->refcount > 0) - return ps->refcount; - ps_free_searches(ps); - dict_free(ps->dict); - dict2pid_free(ps->d2p); - acmod_free(ps->acmod); - logmath_free(ps->lmath); - cmd_ln_free_r(ps->config); - ckd_free(ps); - return 0; -} - -cmd_ln_t * -ps_get_config(ps_decoder_t *ps) -{ - return ps->config; -} - -logmath_t * -ps_get_logmath(ps_decoder_t *ps) -{ - return ps->lmath; -} - -fe_t * -ps_get_fe(ps_decoder_t *ps) -{ - return ps->acmod->fe; -} - -feat_t * -ps_get_feat(ps_decoder_t *ps) -{ - return ps->acmod->fcb; -} - -ps_mllr_t * -ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr) -{ - return acmod_update_mllr(ps->acmod, mllr); -} - -int -ps_set_search(ps_decoder_t *ps, const char *name) -{ - ps_search_t *search = ps_find_search(ps, name); - if (search) - ps->search = search; - - /* Set pl window depending on the search */ - if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) { - ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"); - } else { - ps->pl_window = 0; - } - - return search ? 0 : -1; -} - -const char* -ps_get_search(ps_decoder_t *ps) -{ - hash_iter_t *search_it; - const char* name = NULL; - for (search_it = hash_table_iter(ps->searches); search_it; - search_it = hash_table_iter_next(search_it)) { - if (hash_entry_val(search_it->ent) == ps->search) { - name = hash_entry_key(search_it->ent); - break; - } - } - return name; -} - -int -ps_unset_search(ps_decoder_t *ps, const char *name) -{ - ps_search_t *search = hash_table_delete(ps->searches, name); - if (!search) - return -1; - if (ps->search == search) - ps->search = NULL; - ps_search_free(search); - return 0; -} - -ps_search_iter_t * -ps_search_iter(ps_decoder_t *ps) -{ - return (ps_search_iter_t *)hash_table_iter(ps->searches); -} - -ps_search_iter_t * -ps_search_iter_next(ps_search_iter_t *itor) -{ - return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor); -} - -const char* -ps_search_iter_val(ps_search_iter_t *itor) -{ - return (const char*)(((hash_iter_t *)itor)->ent->key); -} - -void -ps_search_iter_free(ps_search_iter_t *itor) -{ - hash_table_iter_free((hash_iter_t *)itor); -} - -ngram_model_t * -ps_get_lm(ps_decoder_t *ps, const char *name) -{ - ps_search_t *search = ps_find_search(ps, name); - if (search && strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) - return NULL; - return search ? ((ngram_search_t *) search)->lmset : NULL; -} - -fsg_model_t * -ps_get_fsg(ps_decoder_t *ps, const char *name) -{ - ps_search_t *search = ps_find_search(ps, name); - if (search && strcmp(PS_SEARCH_FSG, ps_search_name(search))) - return NULL; - return search ? ((fsg_search_t *) search)->fsg : NULL; -} - -const char* -ps_get_kws(ps_decoder_t *ps, const char* name) -{ - ps_search_t *search = ps_find_search(ps, name); - if (search && strcmp(PS_SEARCH_KWS, ps_search_name(search))) - return NULL; - return search ? kws_search_get_keywords(search) : NULL; -} - -static int -set_search_internal(ps_decoder_t *ps, const char *name, ps_search_t *search) -{ - ps_search_t *old_search; - - if (!search) - return 1; - - search->pls = ps->phone_loop; - old_search = (ps_search_t *) hash_table_replace(ps->searches, ckd_salloc(name), search); - if (old_search != search) - ps_search_free(old_search); - - return 0; -} - -int -ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm) -{ - ps_search_t *search; - search = ngram_search_init(lm, ps->config, ps->acmod, ps->dict, ps->d2p); - return set_search_internal(ps, name, search); -} - -int -ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path) -{ - ngram_model_t *lm; - int result; - - lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); - if (!lm) - return -1; - - result = ps_set_lm(ps, name, lm); - ngram_model_free(lm); - return result; -} - -int -ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm) -{ - ps_search_t *search; - search = allphone_search_init(lm, ps->config, ps->acmod, ps->dict, ps->d2p); - return set_search_internal(ps, name, search); -} - -int -ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path) -{ - ngram_model_t *lm; - int result; - - lm = NULL; - if (path) - lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); - result = ps_set_allphone(ps, name, lm); - if (lm) - ngram_model_free(lm); - return result; -} - -int -ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile) -{ - ps_search_t *search; - search = kws_search_init(NULL, keyfile, ps->config, ps->acmod, ps->dict, ps->d2p); - return set_search_internal(ps, name, search); -} - -int -ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase) -{ - ps_search_t *search; - search = kws_search_init(keyphrase, NULL, ps->config, ps->acmod, ps->dict, ps->d2p); - return set_search_internal(ps, name, search); -} - -int -ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg) -{ - ps_search_t *search; - search = fsg_search_init(fsg, ps->config, ps->acmod, ps->dict, ps->d2p); - return set_search_internal(ps, name, search); -} - -int -ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) -{ - fsg_model_t *fsg; - jsgf_rule_t *rule; - char const *toprule; - jsgf_t *jsgf = jsgf_parse_file(path, NULL); - float lw; - int result; - - if (!jsgf) - return -1; - - rule = NULL; - /* Take the -toprule if specified. */ - if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { - rule = jsgf_get_rule(jsgf, toprule); - if (rule == NULL) { - E_ERROR("Start rule %s not found\n", toprule); - return -1; - } - } else { - rule = jsgf_get_public_rule(jsgf); - if (rule == NULL) { - E_ERROR("No public rules found in %s\n", path); - return -1; - } - } - - lw = cmd_ln_float32_r(ps->config, "-lw"); - fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); - result = ps_set_fsg(ps, name, fsg); - fsg_model_free(fsg); - return result; -} - -int -ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string) -{ - fsg_model_t *fsg; - jsgf_rule_t *rule; - char const *toprule; - jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL); - float lw; - int result; - - if (!jsgf) - return -1; - - rule = NULL; - /* Take the -toprule if specified. */ - if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { - rule = jsgf_get_rule(jsgf, toprule); - if (rule == NULL) { - E_ERROR("Start rule %s not found\n", toprule); - return -1; - } - } else { - rule = jsgf_get_public_rule(jsgf); - if (rule == NULL) { - E_ERROR("No public rules found in input string\n"); - return -1; - } - } - - lw = cmd_ln_float32_r(ps->config, "-lw"); - fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); - result = ps_set_fsg(ps, name, fsg); - fsg_model_free(fsg); - return result; -} - - -int -ps_load_dict(ps_decoder_t *ps, char const *dictfile, - char const *fdictfile, char const *format) -{ - cmd_ln_t *newconfig; - dict2pid_t *d2p; - dict_t *dict; - hash_iter_t *search_it; - - /* Create a new scratch config to load this dict (so existing one - * won't be affected if it fails) */ - newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL); - cmd_ln_set_boolean_r(newconfig, "-dictcase", - cmd_ln_boolean_r(ps->config, "-dictcase")); - cmd_ln_set_str_r(newconfig, "-dict", dictfile); - if (fdictfile) - cmd_ln_set_str_r(newconfig, "-fdict", fdictfile); - else - cmd_ln_set_str_r(newconfig, "-fdict", - cmd_ln_str_r(ps->config, "-fdict")); - - /* Try to load it. */ - if ((dict = dict_init(newconfig, ps->acmod->mdef, ps->acmod->lmath)) == NULL) { - cmd_ln_free_r(newconfig); - return -1; - } - - /* Reinit the dict2pid. */ - if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) { - cmd_ln_free_r(newconfig); - return -1; - } - - /* Success! Update the existing config to reflect new dicts and - * drop everything into place. */ - cmd_ln_free_r(newconfig); - cmd_ln_set_str_r(ps->config, "-dict", dictfile); - if (fdictfile) - cmd_ln_set_str_r(ps->config, "-fdict", fdictfile); - dict_free(ps->dict); - ps->dict = dict; - dict2pid_free(ps->d2p); - ps->d2p = d2p; - - /* And tell all searches to reconfigure themselves. */ - for (search_it = hash_table_iter(ps->searches); search_it; - search_it = hash_table_iter_next(search_it)) { - if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) { - hash_table_iter_free(search_it); - return -1; - } - } - - return 0; -} - -int -ps_save_dict(ps_decoder_t *ps, char const *dictfile, - char const *format) -{ - return dict_write(ps->dict, dictfile, format); -} - -int -ps_add_word(ps_decoder_t *ps, - char const *word, - char const *phones, - int update) -{ - int32 wid; - s3cipid_t *pron; - hash_iter_t *search_it; - char **phonestr, *tmp; - int np, i, rv; - - /* Parse phones into an array of phone IDs. */ - tmp = ckd_salloc(phones); - np = str2words(tmp, NULL, 0); - phonestr = ckd_calloc(np, sizeof(*phonestr)); - str2words(tmp, phonestr, np); - pron = ckd_calloc(np, sizeof(*pron)); - for (i = 0; i < np; ++i) { - pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]); - if (pron[i] == -1) { - E_ERROR("Unknown phone %s in phone string %s\n", - phonestr[i], tmp); - ckd_free(phonestr); - ckd_free(tmp); - ckd_free(pron); - return -1; - } - } - /* No longer needed. */ - ckd_free(phonestr); - ckd_free(tmp); - - /* Add it to the dictionary. */ - if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) { - ckd_free(pron); - return -1; - } - /* No longer needed. */ - ckd_free(pron); - - /* Now we also have to add it to dict2pid. */ - dict2pid_add_word(ps->d2p, wid); - - /* TODO: we definitely need to refactor this */ - for (search_it = hash_table_iter(ps->searches); search_it; - search_it = hash_table_iter_next(search_it)) { - ps_search_t *search = hash_entry_val(search_it->ent); - if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) { - ngram_model_t *lmset = ((ngram_search_t *) search)->lmset; - if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) { - hash_table_iter_free(search_it); - return -1; - } - } - - if (update) { - if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) { - hash_table_iter_free(search_it); - return rv; - } - } - } - - /* Rebuild the widmap and search tree if requested. */ - return wid; -} - -char * -ps_lookup_word(ps_decoder_t *ps, const char *word) -{ - s3wid_t wid; - int32 phlen, j; - char *phones; - dict_t *dict = ps->dict; - - wid = dict_wordid(dict, word); - if (wid == BAD_S3WID) - return NULL; - - for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j) - phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1; - phones = ckd_calloc(1, phlen); - for (j = 0; j < dict_pronlen(dict, wid); ++j) { - strcat(phones, dict_ciphone_str(dict, wid, j)); - if (j != dict_pronlen(dict, wid) - 1) - strcat(phones, " "); - } - return phones; -} - -long -ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, - long maxsamps) -{ - int16 *data; - long total, pos, endpos; - - ps_start_stream(ps); - ps_start_utt(ps); - - /* If this file is seekable or maxsamps is specified, then decode - * the whole thing at once. */ - if (maxsamps != -1) { - data = ckd_calloc(maxsamps, sizeof(*data)); - total = fread(data, sizeof(*data), maxsamps, rawfh); - ps_process_raw(ps, data, total, FALSE, TRUE); - ckd_free(data); - } else if ((pos = ftell(rawfh)) >= 0) { - fseek(rawfh, 0, SEEK_END); - endpos = ftell(rawfh); - fseek(rawfh, pos, SEEK_SET); - maxsamps = endpos - pos; - - data = ckd_calloc(maxsamps, sizeof(*data)); - total = fread(data, sizeof(*data), maxsamps, rawfh); - ps_process_raw(ps, data, total, FALSE, TRUE); - ckd_free(data); - } else { - /* Otherwise decode it in a stream. */ - total = 0; - while (!feof(rawfh)) { - int16 data[256]; - size_t nread; - - nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh); - ps_process_raw(ps, data, nread, FALSE, FALSE); - total += nread; - } - } - ps_end_utt(ps); - return total; -} - -int -ps_start_stream(ps_decoder_t *ps) -{ - acmod_start_stream(ps->acmod); - return 0; -} - -int -ps_start_utt(ps_decoder_t *ps) -{ - int rv; - char uttid[16]; - - if (ps->search == NULL) { - E_ERROR("No search module is selected, did you forget to " - "specify a language model or grammar?\n"); - return -1; - } - - ptmr_reset(&ps->perf); - ptmr_start(&ps->perf); - - sprintf(uttid, "%09u", ps->uttno); - ++ps->uttno; - - /* Remove any residual word lattice and hypothesis. */ - ps_lattice_free(ps->search->dag); - ps->search->dag = NULL; - ps->search->last_link = NULL; - ps->search->post = 0; - ckd_free(ps->search->hyp_str); - ps->search->hyp_str = NULL; - - if ((rv = acmod_start_utt(ps->acmod)) < 0) - return rv; - - /* Start logging features and audio if requested. */ - if (ps->mfclogdir) { - char *logfn = string_join(ps->mfclogdir, "/", - uttid, ".mfc", NULL); - FILE *mfcfh; - E_INFO("Writing MFCC log file: %s\n", logfn); - if ((mfcfh = fopen(logfn, "wb")) == NULL) { - E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn); - ckd_free(logfn); - return -1; - } - ckd_free(logfn); - acmod_set_mfcfh(ps->acmod, mfcfh); - } - if (ps->rawlogdir) { - char *logfn = string_join(ps->rawlogdir, "/", - uttid, ".raw", NULL); - FILE *rawfh; - E_INFO("Writing raw audio log file: %s\n", logfn); - if ((rawfh = fopen(logfn, "wb")) == NULL) { - E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn); - ckd_free(logfn); - return -1; - } - ckd_free(logfn); - acmod_set_rawfh(ps->acmod, rawfh); - } - if (ps->senlogdir) { - char *logfn = string_join(ps->senlogdir, "/", - uttid, ".sen", NULL); - FILE *senfh; - E_INFO("Writing senone score log file: %s\n", logfn); - if ((senfh = fopen(logfn, "wb")) == NULL) { - E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn); - ckd_free(logfn); - return -1; - } - ckd_free(logfn); - acmod_set_senfh(ps->acmod, senfh); - } - - /* Start auxiliary phone loop search. */ - if (ps->phone_loop) - ps_search_start(ps->phone_loop); - - return ps_search_start(ps->search); -} - -static int -ps_search_forward(ps_decoder_t *ps) -{ - int nfr; - - nfr = 0; - while (ps->acmod->n_feat_frame > 0) { - int k; - if (ps->pl_window > 0) - if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0) - return k; - if (ps->acmod->output_frame >= ps->pl_window) - if ((k = ps_search_step(ps->search, - ps->acmod->output_frame - ps->pl_window)) < 0) - return k; - acmod_advance(ps->acmod); - ++ps->n_frame; - ++nfr; - } - return nfr; -} - -int -ps_decode_senscr(ps_decoder_t *ps, FILE *senfh) -{ - int nfr, n_searchfr; - - ps_start_utt(ps); - n_searchfr = 0; - acmod_set_insenfh(ps->acmod, senfh); - while ((nfr = acmod_read_scores(ps->acmod)) > 0) { - if ((nfr = ps_search_forward(ps)) < 0) { - ps_end_utt(ps); - return nfr; - } - n_searchfr += nfr; - } - ps_end_utt(ps); - acmod_set_insenfh(ps->acmod, NULL); - - return n_searchfr; -} - -int -ps_process_raw(ps_decoder_t *ps, - int16 const *data, - size_t n_samples, - int no_search, - int full_utt) -{ - int n_searchfr = 0; - - if (ps->acmod->state == ACMOD_IDLE) { - E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n"); - return 0; - } - - if (no_search) - acmod_set_grow(ps->acmod, TRUE); - - while (n_samples) { - int nfr; - - /* Process some data into features. */ - if ((nfr = acmod_process_raw(ps->acmod, &data, - &n_samples, full_utt)) < 0) - return nfr; - - /* Score and search as much data as possible */ - if (no_search) - continue; - if ((nfr = ps_search_forward(ps)) < 0) - return nfr; - n_searchfr += nfr; - } - - return n_searchfr; -} - -int -ps_process_cep(ps_decoder_t *ps, - mfcc_t **data, - int32 n_frames, - int no_search, - int full_utt) -{ - int n_searchfr = 0; - - if (no_search) - acmod_set_grow(ps->acmod, TRUE); - - while (n_frames) { - int nfr; - - /* Process some data into features. */ - if ((nfr = acmod_process_cep(ps->acmod, &data, - &n_frames, full_utt)) < 0) - return nfr; - - /* Score and search as much data as possible */ - if (no_search) - continue; - if ((nfr = ps_search_forward(ps)) < 0) - return nfr; - n_searchfr += nfr; - } - - return n_searchfr; -} - -int -ps_end_utt(ps_decoder_t *ps) -{ - int rv, i; - - acmod_end_utt(ps->acmod); - - /* Search any remaining frames. */ - if ((rv = ps_search_forward(ps)) < 0) { - ptmr_stop(&ps->perf); - return rv; - } - /* Finish phone loop search. */ - if (ps->phone_loop) { - if ((rv = ps_search_finish(ps->phone_loop)) < 0) { - ptmr_stop(&ps->perf); - return rv; - } - } - /* Search any frames remaining in the lookahead window. */ - if (ps->acmod->output_frame >= ps->pl_window) { - for (i = ps->acmod->output_frame - ps->pl_window; - i < ps->acmod->output_frame; ++i) - ps_search_step(ps->search, i); - } - /* Finish main search. */ - if ((rv = ps_search_finish(ps->search)) < 0) { - ptmr_stop(&ps->perf); - return rv; - } - ptmr_stop(&ps->perf); - - /* Log a backtrace if requested. */ - if (cmd_ln_boolean_r(ps->config, "-backtrace")) { - const char* hyp; - ps_seg_t *seg; - int32 score; - - hyp = ps_get_hyp(ps, &score); - - if (hyp != NULL) { - E_INFO("%s (%d)\n", hyp, score); - E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", - "word", "start", "end", "pprob", "ascr", "lscr", "lback"); - for (seg = ps_seg_iter(ps, &score); seg; - seg = ps_seg_next(seg)) { - char const *word; - int sf, ef; - int32 post, lscr, ascr, lback; - - word = ps_seg_word(seg); - ps_seg_frames(seg, &sf, &ef); - post = ps_seg_prob(seg, &ascr, &lscr, &lback); - E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", - word, sf, ef, logmath_exp(ps_get_logmath(ps), post), - ascr, lscr, lback); - } - } - } - return rv; -} - -char const * -ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) -{ - char const *hyp; - - ptmr_start(&ps->perf); - hyp = ps_search_hyp(ps->search, out_best_score, NULL); - ptmr_stop(&ps->perf); - return hyp; -} - -char const * -ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final) -{ - char const *hyp; - - ptmr_start(&ps->perf); - hyp = ps_search_hyp(ps->search, NULL, out_is_final); - ptmr_stop(&ps->perf); - return hyp; -} - - -int32 -ps_get_prob(ps_decoder_t *ps) -{ - int32 prob; - - ptmr_start(&ps->perf); - prob = ps_search_prob(ps->search); - ptmr_stop(&ps->perf); - return prob; -} - -ps_seg_t * -ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score) -{ - ps_seg_t *itor; - - ptmr_start(&ps->perf); - itor = ps_search_seg_iter(ps->search, out_best_score); - ptmr_stop(&ps->perf); - return itor; -} - -ps_seg_t * -ps_seg_next(ps_seg_t *seg) -{ - return ps_search_seg_next(seg); -} - -char const * -ps_seg_word(ps_seg_t *seg) -{ - return seg->word; -} - -void -ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef) -{ - int uf; - uf = acmod_stream_offset(seg->search->acmod); - if (out_sf) *out_sf = seg->sf + uf; - if (out_ef) *out_ef = seg->ef + uf; -} - -int32 -ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback) -{ - if (out_ascr) *out_ascr = seg->ascr; - if (out_lscr) *out_lscr = seg->lscr; - if (out_lback) *out_lback = seg->lback; - return seg->prob; -} - -void -ps_seg_free(ps_seg_t *seg) -{ - ps_search_seg_free(seg); -} - -ps_lattice_t * -ps_get_lattice(ps_decoder_t *ps) -{ - return ps_search_lattice(ps->search); -} - -ps_nbest_t * -ps_nbest(ps_decoder_t *ps, int sf, int ef, - char const *ctx1, char const *ctx2) -{ - ps_lattice_t *dag; - ngram_model_t *lmset; - ps_astar_t *nbest; - float32 lwf; - int32 w1, w2; - - if (ps->search == NULL) - return NULL; - if ((dag = ps_get_lattice(ps)) == NULL) - return NULL; - - /* FIXME: This is all quite specific to N-Gram search. Either we - * should make N-best a method for each search module or it needs - * to be abstracted to work for N-Gram and FSG. */ - if (0 != strcmp(ps_search_name(ps->search), PS_SEARCH_NGRAM)) { - lmset = NULL; - lwf = 1.0f; - } else { - lmset = ((ngram_search_t *)ps->search)->lmset; - lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio; - } - - w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1; - w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1; - nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2); - - return (ps_nbest_t *)nbest; -} - -void -ps_nbest_free(ps_nbest_t *nbest) -{ - ps_astar_finish(nbest); -} - -ps_nbest_t * -ps_nbest_next(ps_nbest_t *nbest) -{ - ps_latpath_t *next; - - next = ps_astar_next(nbest); - if (next == NULL) { - ps_nbest_free(nbest); - return NULL; - } - return nbest; -} - -char const * -ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score) -{ - assert(nbest != NULL); - - if (nbest->top == NULL) - return NULL; - if (out_score) *out_score = nbest->top->score; - return ps_astar_hyp(nbest, nbest->top); -} - -ps_seg_t * -ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score) -{ - if (nbest->top == NULL) - return NULL; - if (out_score) *out_score = nbest->top->score; - return ps_astar_seg_iter(nbest, nbest->top, 1.0); -} - -int -ps_get_n_frames(ps_decoder_t *ps) -{ - return ps->acmod->output_frame + 1; -} - -void -ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, - double *out_ncpu, double *out_nwall) -{ - int32 frate; - - frate = cmd_ln_int32_r(ps->config, "-frate"); - *out_nspeech = (double)ps->acmod->output_frame / frate; - *out_ncpu = ps->perf.t_cpu; - *out_nwall = ps->perf.t_elapsed; -} - -void -ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, - double *out_ncpu, double *out_nwall) -{ - int32 frate; - - frate = cmd_ln_int32_r(ps->config, "-frate"); - *out_nspeech = (double)ps->n_frame / frate; - *out_ncpu = ps->perf.t_tot_cpu; - *out_nwall = ps->perf.t_tot_elapsed; -} - -uint8 -ps_get_in_speech(ps_decoder_t *ps) -{ - return fe_get_vad_state(ps->acmod->fe); -} - -void -ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, - cmd_ln_t *config, acmod_t *acmod, dict_t *dict, - dict2pid_t *d2p) -{ - search->vt = vt; - search->config = config; - search->acmod = acmod; - if (d2p) - search->d2p = dict2pid_retain(d2p); - else - search->d2p = NULL; - if (dict) { - search->dict = dict_retain(dict); - search->start_wid = dict_startwid(dict); - search->finish_wid = dict_finishwid(dict); - search->silence_wid = dict_silwid(dict); - search->n_words = dict_size(dict); - } - else { - search->dict = NULL; - search->start_wid = search->finish_wid = search->silence_wid = -1; - search->n_words = 0; - } -} - -void -ps_search_base_reinit(ps_search_t *search, dict_t *dict, - dict2pid_t *d2p) -{ - dict_free(search->dict); - dict2pid_free(search->d2p); - /* FIXME: _retain() should just return NULL if passed NULL. */ - if (dict) { - search->dict = dict_retain(dict); - search->start_wid = dict_startwid(dict); - search->finish_wid = dict_finishwid(dict); - search->silence_wid = dict_silwid(dict); - search->n_words = dict_size(dict); - } - else { - search->dict = NULL; - search->start_wid = search->finish_wid = search->silence_wid = -1; - search->n_words = 0; - } - if (d2p) - search->d2p = dict2pid_retain(d2p); - else - search->d2p = NULL; -} - -void -ps_search_deinit(ps_search_t *search) -{ - /* FIXME: We will have refcounting on acmod, config, etc, at which - * point we will free them here too. */ - dict_free(search->dict); - dict2pid_free(search->d2p); - ckd_free(search->hyp_str); - ps_lattice_free(search->dag); -} - -void -ps_set_rawdata_size(ps_decoder_t *ps, int32 size) -{ - acmod_set_rawdata_size(ps->acmod, size); -} - -void -ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size) -{ - acmod_get_rawdata(ps->acmod, buffer, size); -} |