diff options
author | Moonchild <moonchild@palemoon.org> | 2020-05-20 10:19:04 +0000 |
---|---|---|
committer | Moonchild <moonchild@palemoon.org> | 2020-05-20 14:04:17 +0000 |
commit | 99c2e698d2a3c56649e42d8d2133706cd8c9501e (patch) | |
tree | 85be449d772eb57860f0f386efb4bc1e790fd498 /media/pocketsphinx/src/fsg_lextree.h | |
parent | 15ac4021b06d549e47c9e2efc9364a9eb96bfe82 (diff) | |
download | UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.gz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.lz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.xz UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.zip |
Issue #1538 - remove speech recognition engine
This removes speech recognition, pocketsphinx, training models
and the speech automated test interface.
This also re-establishes proper use of MOZ_WEBSPEECH to work
for the speech API (synthesis part only) that was a broken mess
before, with some synth parts being always built, some parts
being built only with it enabled and recognition parts being
dependent on it. I'm pretty sure it'd be totally busted if you'd
ever have tried building without MOZ_WEBPEECH before.
Tested that synthesis still works as-intended.
This resolves #1538
Diffstat (limited to 'media/pocketsphinx/src/fsg_lextree.h')
-rw-r--r-- | media/pocketsphinx/src/fsg_lextree.h | 255 |
1 files changed, 0 insertions, 255 deletions
diff --git a/media/pocketsphinx/src/fsg_lextree.h b/media/pocketsphinx/src/fsg_lextree.h deleted file mode 100644 index 563065c65..000000000 --- a/media/pocketsphinx/src/fsg_lextree.h +++ /dev/null @@ -1,255 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2013 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * fsg_lextree.h -- The collection of all the lextrees for the entire FSM. - * - */ - -#ifndef __S2_FSG_LEXTREE_H__ -#define __S2_FSG_LEXTREE_H__ - -/* SphinxBase headers. */ -#include <sphinxbase/cmd_ln.h> -#include <sphinxbase/fsg_model.h> - -/* Local headers. */ -#include "hmm.h" -#include "dict.h" -#include "dict2pid.h" - -/* - * Compile-time constant determining the size of the - * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.) - * But it makes memory allocation simpler and more efficient. - * Make it smaller (2) to save memory if your phoneset has less than - * 64 phones. - */ -#define FSG_PNODE_CTXT_BVSZ 4 - -typedef struct { - uint32 bv[FSG_PNODE_CTXT_BVSZ]; -} fsg_pnode_ctxt_t; - - -/* - * All transitions (words) out of any given FSG state represented are by a - * phonetic prefix lextree (except for epsilon or null transitions; they - * are not part of the lextree). Lextree leaf nodes represent individual - * FSG transitions, so no sharing is allowed at the leaf nodes. The FSG - * transition probs are distributed along the lextree: the prob at a node - * is the max of the probs of all leaf nodes (and, hence, FSG transitions) - * reachable from that node. - * - * To conserve memory, the underlying HMMs with state-level information are - * allocated only as needed. Root and leaf nodes must also account for all - * the possible phonetic contexts, with an independent HMM for each distinct - * context. - */ -typedef struct fsg_pnode_s { - /* - * If this is not a leaf node, the first successor (child) node. Otherwise - * the parent FSG transition for which this is the leaf node (for figuring - * the FSG destination state, and word emitted by the transition). A node - * may have several children. The succ ptr gives just the first; the rest - * are linked via the sibling ptr below. - */ - union { - struct fsg_pnode_s *succ; - fsg_link_t *fsglink; - } next; - - /* - * For simplicity of memory management (i.e., freeing the pnodes), all - * pnodes allocated for all transitions out of a state are maintained in a - * linear linked list through the alloc_next pointer. - */ - struct fsg_pnode_s *alloc_next; - - /* - * The next node that is also a child of the parent of this node; NULL if - * none. - */ - struct fsg_pnode_s *sibling; - - /* - * The transition (log) probability to be incurred upon transitioning to - * this node. (Transition probabilities are really associated with the - * transitions. But a lextree node has exactly one incoming transition. - * Hence, the prob can be associated with the node.) - * This is a logs2(prob) value, and includes the language weight. - */ - int32 logs2prob; - - /* - * The root and leaf positions associated with any transition have to deal - * with multiple phonetic contexts. However, different contexts may result - * in the same SSID (senone-seq ID), and can share a single pnode with that - * SSID. But the pnode should track the set of context CI phones that share - * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For - * simplicity of implementation, its size is a compile-time constant for - * now.) Single phone words would need a 2-D array of context, but that's - * too expensive. For now, they simply use SIL as right context, so only - * the left context is properly modelled. - * (For word-internal phones, this field is unused, of course.) - */ - fsg_pnode_ctxt_t ctxt; - - uint16 ci_ext; /* This node's CIphone as viewed externally (context) */ - uint8 ppos; /* Phoneme position in pronunciation */ - uint8 leaf; /* Whether this is a leaf node */ - - /* HMM-state-level stuff here */ - hmm_context_t *ctx; - hmm_t hmm; -} fsg_pnode_t; - -/* Access macros */ -#define fsg_pnode_leaf(p) ((p)->leaf) -#define fsg_pnode_logs2prob(p) ((p)->logs2prob) -#define fsg_pnode_succ(p) ((p)->next.succ) -#define fsg_pnode_fsglink(p) ((p)->next.fsglink) -#define fsg_pnode_sibling(p) ((p)->sibling) -#define fsg_pnode_hmmptr(p) (&((p)->hmm)) -#define fsg_pnode_ci_ext(p) ((p)->ci_ext) -#define fsg_pnode_ppos(p) ((p)->ppos) -#define fsg_pnode_leaf(p) ((p)->leaf) -#define fsg_pnode_ctxt(p) ((p)->ctxt) - -#define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f))) - -/* - * The following is macroized because its called very frequently - * ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); - */ -/* - * Subtract bitvector sub from bitvector src (src updated with the result). - * Return 0 if result is all 0, non-zero otherwise. - */ - -#if (FSG_PNODE_CTXT_BVSZ == 1) - #define FSG_PNODE_CTXT_SUB(src,sub) \ - ((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) -#elif (FSG_PNODE_CTXT_BVSZ == 2) - #define FSG_PNODE_CTXT_SUB(src,sub) \ - (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \ - ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1]))) -#elif (FSG_PNODE_CTXT_BVSZ == 4) - #define FSG_PNODE_CTXT_SUB(src,sub) \ - (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \ - ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])) | \ - ((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2])) | \ - ((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3]))) -#else - #define FSG_PNODE_CTXT_SUB(src,sub) fsg_pnode_ctxt_sub_generic((src),(sub)) -#endif - -/** - * Collection of lextrees for an FSG. - */ -typedef struct fsg_lextree_s { - fsg_model_t *fsg; /**< The fsg for which this lextree is built. */ - hmm_context_t *ctx; /**< HMM context structure. */ - dict_t *dict; /**< Pronunciation dictionary for this FSG. */ - dict2pid_t *d2p; /**< Context-dependent phone mappings for this FSG. */ - bin_mdef_t *mdef; /**< Model definition (triphone mappings). */ - - /* - * Left and right CIphone sets for each state. - * Left context CIphones for a state S: If word W transitions into S, W's - * final CIphone is in S's {lc}. Words transitioning out of S must consider - * these left context CIphones. - * Similarly, right contexts for state S: If word W transitions out of S, - * W's first CIphone is in S's {rc}. Words transitioning into S must consider - * these right contexts. - * - * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate - * null transitions. - * NOTE: Single-phone words are difficult; only SILENCE right context is - * modelled for them. - * NOTE: Non-silence filler phones aren't included in these sets. Filler - * words don't use context, and present the SILENCE phone as context to - * adjacent words. - */ - int16 **lc; /**< Left context triphone mappings for FSG. */ - int16 **rc; /**< Right context triphone mappings for FSG. */ - - fsg_pnode_t **root; /* root[s] = lextree representing all transitions - out of state s. Note that the "tree" for each - state is actually a collection of trees, linked - via fsg_pnode_t.sibling (root[s]->sibling) */ - fsg_pnode_t **alloc_head; /* alloc_head[s] = head of linear list of all - pnodes allocated for state s */ - int32 n_pnode; /* #HMM nodes in search structure */ - int32 wip; - int32 pip; -} fsg_lextree_t; - -/* Access macros */ -#define fsg_lextree_root(lt,s) ((lt)->root[s]) -#define fsg_lextree_n_pnode(lt) ((lt)->n_pnode) - -/** - * Create, initialize, and return a new phonetic lextree for the given FSG. - */ -fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, - dict2pid_t *d2p, - bin_mdef_t *mdef, hmm_context_t *ctx, - int32 wip, int32 pip); - -/** - * Free lextrees for an FSG. - */ -void fsg_lextree_free(fsg_lextree_t *fsg); - -/** - * Print an FSG lextree to a file for debugging. - */ -void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh); - -/** - * Mark the given pnode as inactive (for search). - */ -void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode); - -/** - * Set all flags on in the given context bitvector. - */ -void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt); - -/** - * Generic variant for arbitrary size - */ -uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); - -#endif |