diff options
Diffstat (limited to 'media/pocketsphinx/src/pocketsphinx.c')
-rw-r--r-- | media/pocketsphinx/src/pocketsphinx.c | 1421 |
1 files changed, 1421 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/pocketsphinx.c b/media/pocketsphinx/src/pocketsphinx.c new file mode 100644 index 000000000..7514c1fb5 --- /dev/null +++ b/media/pocketsphinx/src/pocketsphinx.c @@ -0,0 +1,1421 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers. */ +#include <stdio.h> +#include <assert.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +/* SphinxBase headers. */ +#include <sphinxbase/err.h> +#include <sphinxbase/strfuncs.h> +#include <sphinxbase/filename.h> +#include <sphinxbase/pio.h> +#include <sphinxbase/jsgf.h> +#include <sphinxbase/hash_table.h> + +/* Local headers. */ +#include "cmdln_macro.h" +#include "pocketsphinx.h" +#include "pocketsphinx_internal.h" +#include "ps_lattice_internal.h" +#include "phone_loop_search.h" +#include "kws_search.h" +#include "fsg_search_internal.h" +#include "ngram_search.h" +#include "ngram_search_fwdtree.h" +#include "ngram_search_fwdflat.h" +#include "allphone_search.h" + +static const arg_t ps_args_def[] = { + POCKETSPHINX_OPTIONS, + CMDLN_EMPTY_OPTION +}; + +/* I'm not sure what the portable way to do this is. */ +static int +file_exists(const char *path) +{ + FILE *tmp; + + tmp = fopen(path, "rb"); + if (tmp) fclose(tmp); + return (tmp != NULL); +} + +#ifdef MODELDIR +static int +hmmdir_exists(const char *path) +{ + FILE *tmp; + char *mdef = string_join(path, "/mdef", NULL); + + tmp = fopen(mdef, "rb"); + if (tmp) fclose(tmp); + ckd_free(mdef); + return (tmp != NULL); +} +#endif + +static void +ps_add_file(ps_decoder_t *ps, const char *arg, + const char *hmmdir, const char *file) +{ + char *tmp = string_join(hmmdir, "/", file, NULL); + + if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp)) + cmd_ln_set_str_r(ps->config, arg, tmp); + ckd_free(tmp); +} + +static void +ps_init_defaults(ps_decoder_t *ps) +{ + /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */ +#ifdef __ADSPBLACKFIN__ + E_INFO("Will not use mmap() on uClinux/Blackfin."); + cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE); +#endif + + char const *hmmdir; + /* Get acoustic model filenames and add them to the command-line */ + if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) { + ps_add_file(ps, "-mdef", hmmdir, "mdef"); + ps_add_file(ps, "-mean", hmmdir, "means"); + ps_add_file(ps, "-var", hmmdir, "variances"); + ps_add_file(ps, "-tmat", hmmdir, "transition_matrices"); + ps_add_file(ps, "-mixw", hmmdir, "mixture_weights"); + ps_add_file(ps, "-sendump", hmmdir, "sendump"); + ps_add_file(ps, "-fdict", hmmdir, "noisedict"); + ps_add_file(ps, "-lda", hmmdir, "feature_transform"); + ps_add_file(ps, "-featparams", hmmdir, "feat.params"); + ps_add_file(ps, "-senmgau", hmmdir, "senmgau"); + } +} + +static void +ps_free_searches(ps_decoder_t *ps) +{ + if (ps->searches) { + /* Release keys manually as we used ckd_salloc to add them, release every search too. */ + hash_iter_t *search_it; + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + ckd_free((char *) hash_entry_key(search_it->ent)); + ps_search_free(hash_entry_val(search_it->ent)); + } + + hash_table_empty(ps->searches); + hash_table_free(ps->searches); + } + + ps->searches = NULL; + ps->search = NULL; +} + +static ps_search_t * +ps_find_search(ps_decoder_t *ps, char const *name) +{ + void *search = NULL; + hash_table_lookup(ps->searches, name, &search); + + return (ps_search_t *) search; +} + +void +ps_default_search_args(cmd_ln_t *config) +{ +#ifdef MODELDIR + /* Set default acoustic and language models. */ + const char *hmmdir = cmd_ln_str_r(config, "-hmm"); + if (hmmdir == NULL && hmmdir_exists(MODELDIR "/en-us/en-us")) { + hmmdir = MODELDIR "/en-us/en-us"; + cmd_ln_set_str_r(config, "-hmm", hmmdir); + } + + const char *lmfile = cmd_ln_str_r(config, "-lm"); + + if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg") + && !cmd_ln_str_r(config, "-jsgf") + && !cmd_ln_str_r(config, "-lmctl") + && !cmd_ln_str_r(config, "-kws") + && !cmd_ln_str_r(config, "-keyphrase") + && file_exists(MODELDIR "/en-us/en-us.lm.dmp")) { + lmfile = MODELDIR "/en-us/en-us.lm.dmp"; + cmd_ln_set_str_r(config, "-lm", lmfile); + } + + const char *dictfile = cmd_ln_str_r(config, "-dict"); + if (dictfile == NULL && file_exists(MODELDIR "/en-us/cmudict-en-us.dict")) { + dictfile = MODELDIR "/en-us/cmudict-en-us.dict"; + cmd_ln_set_str_r(config, "-dict", dictfile); + } + + /* Expand acoustic and language model filenames relative to installation + * path. */ + if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) { + char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL); + if (hmmdir_exists(tmphmm)) { + cmd_ln_set_str_r(config, "-hmm", tmphmm); + } else { + E_ERROR("Failed to find mdef file inside the model folder " + "specified with -hmm `%s'\n", hmmdir); + } + ckd_free(tmphmm); + } + if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) { + char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL); + cmd_ln_set_str_r(config, "-lm", tmplm); + ckd_free(tmplm); + } + if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) { + char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL); + cmd_ln_set_str_r(config, "-dict", tmpdict); + ckd_free(tmpdict); + } +#endif +} + +int +ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) +{ + const char *path; + const char *keyphrase; + int32 lw; + + if (config && config != ps->config) { + cmd_ln_free_r(ps->config); + ps->config = cmd_ln_retain(config); + } + + err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); + ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); + ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); + ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); + + /* Fill in some default arguments. */ + ps_init_defaults(ps); + + /* Free old searches (do this before other reinit) */ + ps_free_searches(ps); + ps->searches = hash_table_new(3, HASH_CASE_YES); + + /* Free old acmod. */ + acmod_free(ps->acmod); + ps->acmod = NULL; + + /* Free old dictionary (must be done after the two things above) */ + dict_free(ps->dict); + ps->dict = NULL; + + /* Free d2p */ + dict2pid_free(ps->d2p); + ps->d2p = NULL; + + /* Logmath computation (used in acmod and search) */ + if (ps->lmath == NULL + || (logmath_get_base(ps->lmath) != + (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { + if (ps->lmath) + logmath_free(ps->lmath); + ps->lmath = logmath_init + ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, + cmd_ln_boolean_r(ps->config, "-bestpath")); + } + + /* Acoustic model (this is basically everything that + * uttproc.c, senscr.c, and others used to do) */ + if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) + return -1; + + if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { + /* Initialize an auxiliary phone loop search, which will run in + * "parallel" with FSG or N-Gram search. */ + if ((ps->phone_loop = + phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) + return -1; + hash_table_enter(ps->searches, + ckd_salloc(ps_search_name(ps->phone_loop)), + ps->phone_loop); + } + + /* Dictionary and triphone mappings (depends on acmod). */ + /* FIXME: pass config, change arguments, implement LTS, etc. */ + if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) + return -1; + if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) + return -1; + + lw = cmd_ln_float32_r(config, "-lw"); + + /* Determine whether we are starting out in FSG or N-Gram search mode. + * If neither is used skip search initialization. */ + + /* Load KWS if one was specified in config */ + if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { + if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) + return -1; + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + + if ((path = cmd_ln_str_r(config, "-kws"))) { + if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) + return -1; + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + + /* Load an FSG if one was specified in config */ + if ((path = cmd_ln_str_r(config, "-fsg"))) { + fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); + if (!fsg) + return -1; + if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) + return -1; + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + + /* Or load a JSGF grammar */ + if ((path = cmd_ln_str_r(config, "-jsgf"))) { + if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) + || ps_set_search(ps, PS_DEFAULT_SEARCH)) + return -1; + } + + if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { + if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) + || ps_set_search(ps, PS_DEFAULT_SEARCH)) + return -1; + } + + if ((path = cmd_ln_str_r(ps->config, "-lm")) && + !cmd_ln_boolean_r(ps->config, "-allphone")) { + if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) + || ps_set_search(ps, PS_DEFAULT_SEARCH)) + return -1; + } + + if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { + const char *name; + ngram_model_t *lmset; + ngram_model_set_iter_t *lmset_it; + + if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { + E_ERROR("Failed to read language model control file: %s\n", path); + return -1; + } + + for(lmset_it = ngram_model_set_iter(lmset); + lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { + + ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); + E_INFO("adding search %s\n", name); + if (ps_set_lm(ps, name, lm)) { + ngram_model_free(lm); + ngram_model_set_iter_free(lmset_it); + return -1; + } + ngram_model_free(lm); + } + + name = cmd_ln_str_r(config, "-lmname"); + if (name) + ps_set_search(ps, name); + else { + E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); + return -1; + } + } + + /* Initialize performance timer. */ + ps->perf.name = "decode"; + ptmr_init(&ps->perf); + + return 0; +} + +ps_decoder_t * +ps_init(cmd_ln_t *config) +{ + ps_decoder_t *ps; + + ps = ckd_calloc(1, sizeof(*ps)); + ps->refcount = 1; + if (ps_reinit(ps, config) < 0) { + ps_free(ps); + return NULL; + } + return ps; +} + +arg_t const * +ps_args(void) +{ + return ps_args_def; +} + +ps_decoder_t * +ps_retain(ps_decoder_t *ps) +{ + ++ps->refcount; + return ps; +} + +int +ps_free(ps_decoder_t *ps) +{ + if (ps == NULL) + return 0; + if (--ps->refcount > 0) + return ps->refcount; + ps_free_searches(ps); + dict_free(ps->dict); + dict2pid_free(ps->d2p); + acmod_free(ps->acmod); + logmath_free(ps->lmath); + cmd_ln_free_r(ps->config); + ckd_free(ps); + return 0; +} + +cmd_ln_t * +ps_get_config(ps_decoder_t *ps) +{ + return ps->config; +} + +logmath_t * +ps_get_logmath(ps_decoder_t *ps) +{ + return ps->lmath; +} + +fe_t * +ps_get_fe(ps_decoder_t *ps) +{ + return ps->acmod->fe; +} + +feat_t * +ps_get_feat(ps_decoder_t *ps) +{ + return ps->acmod->fcb; +} + +ps_mllr_t * +ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr) +{ + return acmod_update_mllr(ps->acmod, mllr); +} + +int +ps_set_search(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search) + ps->search = search; + + /* Set pl window depending on the search */ + if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) { + ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"); + } else { + ps->pl_window = 0; + } + + return search ? 0 : -1; +} + +const char* +ps_get_search(ps_decoder_t *ps) +{ + hash_iter_t *search_it; + const char* name = NULL; + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + if (hash_entry_val(search_it->ent) == ps->search) { + name = hash_entry_key(search_it->ent); + break; + } + } + return name; +} + +int +ps_unset_search(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = hash_table_delete(ps->searches, name); + if (!search) + return -1; + if (ps->search == search) + ps->search = NULL; + ps_search_free(search); + return 0; +} + +ps_search_iter_t * +ps_search_iter(ps_decoder_t *ps) +{ + return (ps_search_iter_t *)hash_table_iter(ps->searches); +} + +ps_search_iter_t * +ps_search_iter_next(ps_search_iter_t *itor) +{ + return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor); +} + +const char* +ps_search_iter_val(ps_search_iter_t *itor) +{ + return (const char*)(((hash_iter_t *)itor)->ent->key); +} + +void +ps_search_iter_free(ps_search_iter_t *itor) +{ + hash_table_iter_free((hash_iter_t *)itor); +} + +ngram_model_t * +ps_get_lm(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) + return NULL; + return search ? ((ngram_search_t *) search)->lmset : NULL; +} + +fsg_model_t * +ps_get_fsg(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_FSG, ps_search_name(search))) + return NULL; + return search ? ((fsg_search_t *) search)->fsg : NULL; +} + +const char* +ps_get_kws(ps_decoder_t *ps, const char* name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_KWS, ps_search_name(search))) + return NULL; + return search ? kws_search_get_keywords(search) : NULL; +} + +static int +set_search_internal(ps_decoder_t *ps, const char *name, ps_search_t *search) +{ + ps_search_t *old_search; + + if (!search) + return 1; + + search->pls = ps->phone_loop; + old_search = (ps_search_t *) hash_table_replace(ps->searches, ckd_salloc(name), search); + if (old_search != search) + ps_search_free(old_search); + + return 0; +} + +int +ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm) +{ + ps_search_t *search; + search = ngram_search_init(lm, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, name, search); +} + +int +ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path) +{ + ngram_model_t *lm; + int result; + + lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); + if (!lm) + return -1; + + result = ps_set_lm(ps, name, lm); + ngram_model_free(lm); + return result; +} + +int +ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm) +{ + ps_search_t *search; + search = allphone_search_init(lm, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, name, search); +} + +int +ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path) +{ + ngram_model_t *lm; + int result; + + lm = NULL; + if (path) + lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); + result = ps_set_allphone(ps, name, lm); + if (lm) + ngram_model_free(lm); + return result; +} + +int +ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile) +{ + ps_search_t *search; + search = kws_search_init(NULL, keyfile, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, name, search); +} + +int +ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase) +{ + ps_search_t *search; + search = kws_search_init(keyphrase, NULL, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, name, search); +} + +int +ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg) +{ + ps_search_t *search; + search = fsg_search_init(fsg, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, name, search); +} + +int +ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + char const *toprule; + jsgf_t *jsgf = jsgf_parse_file(path, NULL); + float lw; + int result; + + if (!jsgf) + return -1; + + rule = NULL; + /* Take the -toprule if specified. */ + if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { + rule = jsgf_get_rule(jsgf, toprule); + if (rule == NULL) { + E_ERROR("Start rule %s not found\n", toprule); + return -1; + } + } else { + rule = jsgf_get_public_rule(jsgf); + if (rule == NULL) { + E_ERROR("No public rules found in %s\n", path); + return -1; + } + } + + lw = cmd_ln_float32_r(ps->config, "-lw"); + fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); + result = ps_set_fsg(ps, name, fsg); + fsg_model_free(fsg); + return result; +} + +int +ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + char const *toprule; + jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL); + float lw; + int result; + + if (!jsgf) + return -1; + + rule = NULL; + /* Take the -toprule if specified. */ + if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { + rule = jsgf_get_rule(jsgf, toprule); + if (rule == NULL) { + E_ERROR("Start rule %s not found\n", toprule); + return -1; + } + } else { + rule = jsgf_get_public_rule(jsgf); + if (rule == NULL) { + E_ERROR("No public rules found in input string\n"); + return -1; + } + } + + lw = cmd_ln_float32_r(ps->config, "-lw"); + fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); + result = ps_set_fsg(ps, name, fsg); + fsg_model_free(fsg); + return result; +} + + +int +ps_load_dict(ps_decoder_t *ps, char const *dictfile, + char const *fdictfile, char const *format) +{ + cmd_ln_t *newconfig; + dict2pid_t *d2p; + dict_t *dict; + hash_iter_t *search_it; + + /* Create a new scratch config to load this dict (so existing one + * won't be affected if it fails) */ + newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL); + cmd_ln_set_boolean_r(newconfig, "-dictcase", + cmd_ln_boolean_r(ps->config, "-dictcase")); + cmd_ln_set_str_r(newconfig, "-dict", dictfile); + if (fdictfile) + cmd_ln_set_str_r(newconfig, "-fdict", fdictfile); + else + cmd_ln_set_str_r(newconfig, "-fdict", + cmd_ln_str_r(ps->config, "-fdict")); + + /* Try to load it. */ + if ((dict = dict_init(newconfig, ps->acmod->mdef, ps->acmod->lmath)) == NULL) { + cmd_ln_free_r(newconfig); + return -1; + } + + /* Reinit the dict2pid. */ + if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) { + cmd_ln_free_r(newconfig); + return -1; + } + + /* Success! Update the existing config to reflect new dicts and + * drop everything into place. */ + cmd_ln_free_r(newconfig); + cmd_ln_set_str_r(ps->config, "-dict", dictfile); + if (fdictfile) + cmd_ln_set_str_r(ps->config, "-fdict", fdictfile); + dict_free(ps->dict); + ps->dict = dict; + dict2pid_free(ps->d2p); + ps->d2p = d2p; + + /* And tell all searches to reconfigure themselves. */ + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) { + hash_table_iter_free(search_it); + return -1; + } + } + + return 0; +} + +int +ps_save_dict(ps_decoder_t *ps, char const *dictfile, + char const *format) +{ + return dict_write(ps->dict, dictfile, format); +} + +int +ps_add_word(ps_decoder_t *ps, + char const *word, + char const *phones, + int update) +{ + int32 wid; + s3cipid_t *pron; + hash_iter_t *search_it; + char **phonestr, *tmp; + int np, i, rv; + + /* Parse phones into an array of phone IDs. */ + tmp = ckd_salloc(phones); + np = str2words(tmp, NULL, 0); + phonestr = ckd_calloc(np, sizeof(*phonestr)); + str2words(tmp, phonestr, np); + pron = ckd_calloc(np, sizeof(*pron)); + for (i = 0; i < np; ++i) { + pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]); + if (pron[i] == -1) { + E_ERROR("Unknown phone %s in phone string %s\n", + phonestr[i], tmp); + ckd_free(phonestr); + ckd_free(tmp); + ckd_free(pron); + return -1; + } + } + /* No longer needed. */ + ckd_free(phonestr); + ckd_free(tmp); + + /* Add it to the dictionary. */ + if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) { + ckd_free(pron); + return -1; + } + /* No longer needed. */ + ckd_free(pron); + + /* Now we also have to add it to dict2pid. */ + dict2pid_add_word(ps->d2p, wid); + + /* TODO: we definitely need to refactor this */ + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + ps_search_t *search = hash_entry_val(search_it->ent); + if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) { + ngram_model_t *lmset = ((ngram_search_t *) search)->lmset; + if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) { + hash_table_iter_free(search_it); + return -1; + } + } + + if (update) { + if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) { + hash_table_iter_free(search_it); + return rv; + } + } + } + + /* Rebuild the widmap and search tree if requested. */ + return wid; +} + +char * +ps_lookup_word(ps_decoder_t *ps, const char *word) +{ + s3wid_t wid; + int32 phlen, j; + char *phones; + dict_t *dict = ps->dict; + + wid = dict_wordid(dict, word); + if (wid == BAD_S3WID) + return NULL; + + for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j) + phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1; + phones = ckd_calloc(1, phlen); + for (j = 0; j < dict_pronlen(dict, wid); ++j) { + strcat(phones, dict_ciphone_str(dict, wid, j)); + if (j != dict_pronlen(dict, wid) - 1) + strcat(phones, " "); + } + return phones; +} + +long +ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, + long maxsamps) +{ + int16 *data; + long total, pos, endpos; + + ps_start_stream(ps); + ps_start_utt(ps); + + /* If this file is seekable or maxsamps is specified, then decode + * the whole thing at once. */ + if (maxsamps != -1) { + data = ckd_calloc(maxsamps, sizeof(*data)); + total = fread(data, sizeof(*data), maxsamps, rawfh); + ps_process_raw(ps, data, total, FALSE, TRUE); + ckd_free(data); + } else if ((pos = ftell(rawfh)) >= 0) { + fseek(rawfh, 0, SEEK_END); + endpos = ftell(rawfh); + fseek(rawfh, pos, SEEK_SET); + maxsamps = endpos - pos; + + data = ckd_calloc(maxsamps, sizeof(*data)); + total = fread(data, sizeof(*data), maxsamps, rawfh); + ps_process_raw(ps, data, total, FALSE, TRUE); + ckd_free(data); + } else { + /* Otherwise decode it in a stream. */ + total = 0; + while (!feof(rawfh)) { + int16 data[256]; + size_t nread; + + nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh); + ps_process_raw(ps, data, nread, FALSE, FALSE); + total += nread; + } + } + ps_end_utt(ps); + return total; +} + +int +ps_start_stream(ps_decoder_t *ps) +{ + acmod_start_stream(ps->acmod); + return 0; +} + +int +ps_start_utt(ps_decoder_t *ps) +{ + int rv; + char uttid[16]; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return -1; + } + + ptmr_reset(&ps->perf); + ptmr_start(&ps->perf); + + sprintf(uttid, "%09u", ps->uttno); + ++ps->uttno; + + /* Remove any residual word lattice and hypothesis. */ + ps_lattice_free(ps->search->dag); + ps->search->dag = NULL; + ps->search->last_link = NULL; + ps->search->post = 0; + ckd_free(ps->search->hyp_str); + ps->search->hyp_str = NULL; + + if ((rv = acmod_start_utt(ps->acmod)) < 0) + return rv; + + /* Start logging features and audio if requested. */ + if (ps->mfclogdir) { + char *logfn = string_join(ps->mfclogdir, "/", + uttid, ".mfc", NULL); + FILE *mfcfh; + E_INFO("Writing MFCC log file: %s\n", logfn); + if ((mfcfh = fopen(logfn, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn); + ckd_free(logfn); + return -1; + } + ckd_free(logfn); + acmod_set_mfcfh(ps->acmod, mfcfh); + } + if (ps->rawlogdir) { + char *logfn = string_join(ps->rawlogdir, "/", + uttid, ".raw", NULL); + FILE *rawfh; + E_INFO("Writing raw audio log file: %s\n", logfn); + if ((rawfh = fopen(logfn, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn); + ckd_free(logfn); + return -1; + } + ckd_free(logfn); + acmod_set_rawfh(ps->acmod, rawfh); + } + if (ps->senlogdir) { + char *logfn = string_join(ps->senlogdir, "/", + uttid, ".sen", NULL); + FILE *senfh; + E_INFO("Writing senone score log file: %s\n", logfn); + if ((senfh = fopen(logfn, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn); + ckd_free(logfn); + return -1; + } + ckd_free(logfn); + acmod_set_senfh(ps->acmod, senfh); + } + + /* Start auxiliary phone loop search. */ + if (ps->phone_loop) + ps_search_start(ps->phone_loop); + + return ps_search_start(ps->search); +} + +static int +ps_search_forward(ps_decoder_t *ps) +{ + int nfr; + + nfr = 0; + while (ps->acmod->n_feat_frame > 0) { + int k; + if (ps->pl_window > 0) + if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0) + return k; + if (ps->acmod->output_frame >= ps->pl_window) + if ((k = ps_search_step(ps->search, + ps->acmod->output_frame - ps->pl_window)) < 0) + return k; + acmod_advance(ps->acmod); + ++ps->n_frame; + ++nfr; + } + return nfr; +} + +int +ps_decode_senscr(ps_decoder_t *ps, FILE *senfh) +{ + int nfr, n_searchfr; + + ps_start_utt(ps); + n_searchfr = 0; + acmod_set_insenfh(ps->acmod, senfh); + while ((nfr = acmod_read_scores(ps->acmod)) > 0) { + if ((nfr = ps_search_forward(ps)) < 0) { + ps_end_utt(ps); + return nfr; + } + n_searchfr += nfr; + } + ps_end_utt(ps); + acmod_set_insenfh(ps->acmod, NULL); + + return n_searchfr; +} + +int +ps_process_raw(ps_decoder_t *ps, + int16 const *data, + size_t n_samples, + int no_search, + int full_utt) +{ + int n_searchfr = 0; + + if (ps->acmod->state == ACMOD_IDLE) { + E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n"); + return 0; + } + + if (no_search) + acmod_set_grow(ps->acmod, TRUE); + + while (n_samples) { + int nfr; + + /* Process some data into features. */ + if ((nfr = acmod_process_raw(ps->acmod, &data, + &n_samples, full_utt)) < 0) + return nfr; + + /* Score and search as much data as possible */ + if (no_search) + continue; + if ((nfr = ps_search_forward(ps)) < 0) + return nfr; + n_searchfr += nfr; + } + + return n_searchfr; +} + +int +ps_process_cep(ps_decoder_t *ps, + mfcc_t **data, + int32 n_frames, + int no_search, + int full_utt) +{ + int n_searchfr = 0; + + if (no_search) + acmod_set_grow(ps->acmod, TRUE); + + while (n_frames) { + int nfr; + + /* Process some data into features. */ + if ((nfr = acmod_process_cep(ps->acmod, &data, + &n_frames, full_utt)) < 0) + return nfr; + + /* Score and search as much data as possible */ + if (no_search) + continue; + if ((nfr = ps_search_forward(ps)) < 0) + return nfr; + n_searchfr += nfr; + } + + return n_searchfr; +} + +int +ps_end_utt(ps_decoder_t *ps) +{ + int rv, i; + + acmod_end_utt(ps->acmod); + + /* Search any remaining frames. */ + if ((rv = ps_search_forward(ps)) < 0) { + ptmr_stop(&ps->perf); + return rv; + } + /* Finish phone loop search. */ + if (ps->phone_loop) { + if ((rv = ps_search_finish(ps->phone_loop)) < 0) { + ptmr_stop(&ps->perf); + return rv; + } + } + /* Search any frames remaining in the lookahead window. */ + if (ps->acmod->output_frame >= ps->pl_window) { + for (i = ps->acmod->output_frame - ps->pl_window; + i < ps->acmod->output_frame; ++i) + ps_search_step(ps->search, i); + } + /* Finish main search. */ + if ((rv = ps_search_finish(ps->search)) < 0) { + ptmr_stop(&ps->perf); + return rv; + } + ptmr_stop(&ps->perf); + + /* Log a backtrace if requested. */ + if (cmd_ln_boolean_r(ps->config, "-backtrace")) { + const char* hyp; + ps_seg_t *seg; + int32 score; + + hyp = ps_get_hyp(ps, &score); + + if (hyp != NULL) { + E_INFO("%s (%d)\n", hyp, score); + E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", + "word", "start", "end", "pprob", "ascr", "lscr", "lback"); + for (seg = ps_seg_iter(ps, &score); seg; + seg = ps_seg_next(seg)) { + char const *word; + int sf, ef; + int32 post, lscr, ascr, lback; + + word = ps_seg_word(seg); + ps_seg_frames(seg, &sf, &ef); + post = ps_seg_prob(seg, &ascr, &lscr, &lback); + E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", + word, sf, ef, logmath_exp(ps_get_logmath(ps), post), + ascr, lscr, lback); + } + } + } + return rv; +} + +char const * +ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) +{ + char const *hyp; + + ptmr_start(&ps->perf); + hyp = ps_search_hyp(ps->search, out_best_score, NULL); + ptmr_stop(&ps->perf); + return hyp; +} + +char const * +ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final) +{ + char const *hyp; + + ptmr_start(&ps->perf); + hyp = ps_search_hyp(ps->search, NULL, out_is_final); + ptmr_stop(&ps->perf); + return hyp; +} + + +int32 +ps_get_prob(ps_decoder_t *ps) +{ + int32 prob; + + ptmr_start(&ps->perf); + prob = ps_search_prob(ps->search); + ptmr_stop(&ps->perf); + return prob; +} + +ps_seg_t * +ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score) +{ + ps_seg_t *itor; + + ptmr_start(&ps->perf); + itor = ps_search_seg_iter(ps->search, out_best_score); + ptmr_stop(&ps->perf); + return itor; +} + +ps_seg_t * +ps_seg_next(ps_seg_t *seg) +{ + return ps_search_seg_next(seg); +} + +char const * +ps_seg_word(ps_seg_t *seg) +{ + return seg->word; +} + +void +ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef) +{ + int uf; + uf = acmod_stream_offset(seg->search->acmod); + if (out_sf) *out_sf = seg->sf + uf; + if (out_ef) *out_ef = seg->ef + uf; +} + +int32 +ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback) +{ + if (out_ascr) *out_ascr = seg->ascr; + if (out_lscr) *out_lscr = seg->lscr; + if (out_lback) *out_lback = seg->lback; + return seg->prob; +} + +void +ps_seg_free(ps_seg_t *seg) +{ + ps_search_seg_free(seg); +} + +ps_lattice_t * +ps_get_lattice(ps_decoder_t *ps) +{ + return ps_search_lattice(ps->search); +} + +ps_nbest_t * +ps_nbest(ps_decoder_t *ps, int sf, int ef, + char const *ctx1, char const *ctx2) +{ + ps_lattice_t *dag; + ngram_model_t *lmset; + ps_astar_t *nbest; + float32 lwf; + int32 w1, w2; + + if (ps->search == NULL) + return NULL; + if ((dag = ps_get_lattice(ps)) == NULL) + return NULL; + + /* FIXME: This is all quite specific to N-Gram search. Either we + * should make N-best a method for each search module or it needs + * to be abstracted to work for N-Gram and FSG. */ + if (0 != strcmp(ps_search_name(ps->search), PS_SEARCH_NGRAM)) { + lmset = NULL; + lwf = 1.0f; + } else { + lmset = ((ngram_search_t *)ps->search)->lmset; + lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio; + } + + w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1; + w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1; + nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2); + + return (ps_nbest_t *)nbest; +} + +void +ps_nbest_free(ps_nbest_t *nbest) +{ + ps_astar_finish(nbest); +} + +ps_nbest_t * +ps_nbest_next(ps_nbest_t *nbest) +{ + ps_latpath_t *next; + + next = ps_astar_next(nbest); + if (next == NULL) { + ps_nbest_free(nbest); + return NULL; + } + return nbest; +} + +char const * +ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score) +{ + assert(nbest != NULL); + + if (nbest->top == NULL) + return NULL; + if (out_score) *out_score = nbest->top->score; + return ps_astar_hyp(nbest, nbest->top); +} + +ps_seg_t * +ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score) +{ + if (nbest->top == NULL) + return NULL; + if (out_score) *out_score = nbest->top->score; + return ps_astar_seg_iter(nbest, nbest->top, 1.0); +} + +int +ps_get_n_frames(ps_decoder_t *ps) +{ + return ps->acmod->output_frame + 1; +} + +void +ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, + double *out_ncpu, double *out_nwall) +{ + int32 frate; + + frate = cmd_ln_int32_r(ps->config, "-frate"); + *out_nspeech = (double)ps->acmod->output_frame / frate; + *out_ncpu = ps->perf.t_cpu; + *out_nwall = ps->perf.t_elapsed; +} + +void +ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, + double *out_ncpu, double *out_nwall) +{ + int32 frate; + + frate = cmd_ln_int32_r(ps->config, "-frate"); + *out_nspeech = (double)ps->n_frame / frate; + *out_ncpu = ps->perf.t_tot_cpu; + *out_nwall = ps->perf.t_tot_elapsed; +} + +uint8 +ps_get_in_speech(ps_decoder_t *ps) +{ + return fe_get_vad_state(ps->acmod->fe); +} + +void +ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, + cmd_ln_t *config, acmod_t *acmod, dict_t *dict, + dict2pid_t *d2p) +{ + search->vt = vt; + search->config = config; + search->acmod = acmod; + if (d2p) + search->d2p = dict2pid_retain(d2p); + else + search->d2p = NULL; + if (dict) { + search->dict = dict_retain(dict); + search->start_wid = dict_startwid(dict); + search->finish_wid = dict_finishwid(dict); + search->silence_wid = dict_silwid(dict); + search->n_words = dict_size(dict); + } + else { + search->dict = NULL; + search->start_wid = search->finish_wid = search->silence_wid = -1; + search->n_words = 0; + } +} + +void +ps_search_base_reinit(ps_search_t *search, dict_t *dict, + dict2pid_t *d2p) +{ + dict_free(search->dict); + dict2pid_free(search->d2p); + /* FIXME: _retain() should just return NULL if passed NULL. */ + if (dict) { + search->dict = dict_retain(dict); + search->start_wid = dict_startwid(dict); + search->finish_wid = dict_finishwid(dict); + search->silence_wid = dict_silwid(dict); + search->n_words = dict_size(dict); + } + else { + search->dict = NULL; + search->start_wid = search->finish_wid = search->silence_wid = -1; + search->n_words = 0; + } + if (d2p) + search->d2p = dict2pid_retain(d2p); + else + search->d2p = NULL; +} + +void +ps_search_deinit(ps_search_t *search) +{ + /* FIXME: We will have refcounting on acmod, config, etc, at which + * point we will free them here too. */ + dict_free(search->dict); + dict2pid_free(search->d2p); + ckd_free(search->hyp_str); + ps_lattice_free(search->dag); +} + +void +ps_set_rawdata_size(ps_decoder_t *ps, int32 size) +{ + acmod_set_rawdata_size(ps->acmod, size); +} + +void +ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size) +{ + acmod_get_rawdata(ps->acmod, buffer, size); +} |