diff options
Diffstat (limited to 'media/pocketsphinx/src/fsg_lextree.c')
-rw-r--r-- | media/pocketsphinx/src/fsg_lextree.c | 835 |
1 files changed, 835 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/fsg_lextree.c b/media/pocketsphinx/src/fsg_lextree.c new file mode 100644 index 000000000..573f06b2f --- /dev/null +++ b/media/pocketsphinx/src/fsg_lextree.c @@ -0,0 +1,835 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file fsg_lextree.c + * @brief The collection of all the lextrees for the entire FSM. + * @author M K Ravishankar <rkm@cs.cmu.edu> + * @author Bhiksha Raj <bhiksha@cs.cmu.edu> + */ + +/* System headers. */ +#include <stdio.h> +#include <string.h> +#include <assert.h> + +/* SphinxBase headers. */ +#include <sphinxbase/ckd_alloc.h> +#include <sphinxbase/err.h> + +/* Local headers. */ +#include "fsg_lextree.h" + +#define __FSG_DBG__ 0 + +/* A linklist structure that is actually used to build local lextrees at grammar nodes */ +typedef struct fsg_glist_linklist_t { + int32 ci, rc; + glist_t glist; + struct fsg_glist_linklist_t *next; +} fsg_glist_linklist_t; + +/** + * Build the phone lextree for all transitions out of state from_state. + * Return the root node of this tree. + * Also, return a linear linked list of all allocated fsg_pnode_t nodes in + * *alloc_head (for memory management purposes). + */ +static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree, + fsg_model_t *fsg, + int32 from_state, + fsg_pnode_t **alloc_head); + +/** + * Free the given lextree. alloc_head: head of linear list of allocated + * nodes updated by fsg_psubtree_init(). + */ +static void fsg_psubtree_free(fsg_pnode_t *alloc_head); + +/** + * Dump the list of nodes in the given lextree to the given file. alloc_head: + * head of linear list of allocated nodes updated by fsg_psubtree_init(). + */ +static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE *fp); + +/** + * Compute the left and right context CIphone sets for each state. + */ +static void +fsg_lextree_lc_rc(fsg_lextree_t *lextree) +{ + int32 s, i, j; + int32 n_ci; + fsg_model_t *fsg; + int32 silcipid; + int32 len; + + silcipid = bin_mdef_silphone(lextree->mdef); + assert(silcipid >= 0); + n_ci = bin_mdef_n_ciphone(lextree->mdef); + + fsg = lextree->fsg; + /* + * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s] + * for right context CIphones. + */ + lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc)); + lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc)); + E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n", + fsg->n_state * (n_ci + 1) * 2, + fsg->n_state * (n_ci + 1) * 2 / 1024); + + + for (s = 0; s < fsg->n_state; s++) { + fsg_arciter_t *itor; + for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { + fsg_link_t *l = fsg_arciter_get(itor); + int32 dictwid; /**< Dictionary (not FSG) word ID!! */ + + if (fsg_link_wid(l) >= 0) { + dictwid = dict_wordid(lextree->dict, + fsg_model_word_str(lextree->fsg, l->wid)); + + /* + * Add the first CIphone of l->wid to the rclist of state s, and + * the last CIphone to lclist of state d. + * (Filler phones are a pain to deal with. There is no direct + * marking of a filler phone; but only filler words are supposed to + * use such phones, so we use that fact. HACK!! FRAGILE!!) + * + * UPD: tests carsh here if .fsg model used with wrong hmm and + * dictionary + */ + if (fsg_model_is_filler(fsg, fsg_link_wid(l))) { + /* Filler phone; use silence phone as context */ + lextree->rc[fsg_link_from_state(l)][silcipid] = 1; + lextree->lc[fsg_link_to_state(l)][silcipid] = 1; + } + else { + len = dict_pronlen(lextree->dict, dictwid); + lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1; + lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1; + } + } + } + } + + for (s = 0; s < fsg->n_state; s++) { + /* + * Add SIL phone to the lclist and rclist of each state. Strictly + * speaking, only needed at start and final states, respectively, but + * all states considered since the user may change the start and final + * states. In any case, most applications would have a silence self + * loop at each state, hence these would be needed anyway. + */ + lextree->lc[s][silcipid] = 1; + lextree->rc[s][silcipid] = 1; + } + + /* + * Propagate lc and rc lists past null transitions. (Since FSG contains + * null transitions closure, no need to worry about a chain of successive + * null transitions. Right??) + * + * This can't be joined with the previous loop because we first calculate + * contexts and only then we can propagate them. + */ + for (s = 0; s < fsg->n_state; s++) { + fsg_arciter_t *itor; + for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { + fsg_link_t *l = fsg_arciter_get(itor); + if (fsg_link_wid(l) < 0) { + /* + * lclist(d) |= lclist(s), because all the words ending up at s, can + * now also end at d, becoming the left context for words leaving d. + */ + for (i = 0; i < n_ci; i++) + lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i]; + /* + * Similarly, rclist(s) |= rclist(d), because all the words leaving d + * can equivalently leave s, becoming the right context for words + * ending up at s. + */ + for (i = 0; i < n_ci; i++) + lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i]; + } + } + } + + /* Convert the bit-vector representation into a list */ + for (s = 0; s < fsg->n_state; s++) { + j = 0; + for (i = 0; i < n_ci; i++) { + if (lextree->lc[s][i]) { + lextree->lc[s][j] = i; + j++; + } + } + lextree->lc[s][j] = -1; /* Terminate the list */ + + j = 0; + for (i = 0; i < n_ci; i++) { + if (lextree->rc[s][i]) { + lextree->rc[s][j] = i; + j++; + } + } + lextree->rc[s][j] = -1; /* Terminate the list */ + } +} + +/* + * For now, allocate the entire lextree statically. + */ +fsg_lextree_t * +fsg_lextree_init(fsg_model_t * fsg, dict_t *dict, dict2pid_t *d2p, + bin_mdef_t *mdef, hmm_context_t *ctx, + int32 wip, int32 pip) +{ + int32 s, n_leaves; + fsg_lextree_t *lextree; + fsg_pnode_t *pn; + + lextree = ckd_calloc(1, sizeof(fsg_lextree_t)); + lextree->fsg = fsg; + lextree->root = ckd_calloc(fsg_model_n_state(fsg), + sizeof(fsg_pnode_t *)); + lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg), + sizeof(fsg_pnode_t *)); + lextree->ctx = ctx; + lextree->dict = dict; + lextree->d2p = d2p; + lextree->mdef = mdef; + lextree->wip = wip; + lextree->pip = pip; + + /* Compute lc and rc for fsg. */ + fsg_lextree_lc_rc(lextree); + + /* Create lextree for each state, i.e. an HMM network that + * represents words for all arcs exiting that state. Note that + * for a dense grammar such as an N-gram model, this will + * rapidly exhaust all available memory. */ + lextree->n_pnode = 0; + n_leaves = 0; + for (s = 0; s < fsg_model_n_state(fsg); s++) { + lextree->root[s] = + fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s])); + + for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next) { + lextree->n_pnode++; + if (pn->leaf) + ++n_leaves; + } + } + E_INFO("%d HMM nodes in lextree (%d leaves)\n", + lextree->n_pnode, n_leaves); + E_INFO("Allocated %d bytes (%d KiB) for all lextree nodes\n", + lextree->n_pnode * sizeof(fsg_pnode_t), + lextree->n_pnode * sizeof(fsg_pnode_t) / 1024); + E_INFO("Allocated %d bytes (%d KiB) for lextree leafnodes\n", + n_leaves * sizeof(fsg_pnode_t), + n_leaves * sizeof(fsg_pnode_t) / 1024); + +#if __FSG_DBG__ + fsg_lextree_dump(lextree, stdout); +#endif + + return lextree; +} + + +void +fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp) +{ + int32 s; + + for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) { + fprintf(fp, "State %5d root %p\n", s, lextree->root[s]); + fsg_psubtree_dump(lextree, lextree->root[s], fp); + } + fflush(fp); +} + + +void +fsg_lextree_free(fsg_lextree_t * lextree) +{ + int32 s; + + if (lextree == NULL) + return; + + if (lextree->fsg) + for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) + fsg_psubtree_free(lextree->alloc_head[s]); + + ckd_free_2d(lextree->lc); + ckd_free_2d(lextree->rc); + ckd_free(lextree->root); + ckd_free(lextree->alloc_head); + ckd_free(lextree); +} + +/****************************** + * psubtree stuff starts here * + ******************************/ + +void fsg_glist_linklist_free(fsg_glist_linklist_t *glist) +{ + if (glist) { + fsg_glist_linklist_t *nxtglist; + if (glist->glist) + glist_free(glist->glist); + nxtglist = glist->next; + while (nxtglist) { + ckd_free(glist); + glist = nxtglist; + if (glist->glist) + glist_free(glist->glist); + nxtglist = glist->next; + } + ckd_free(glist); + } + return; +} + +void +fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t * ctxt) +{ + int32 i; + + for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) + ctxt->bv[i] = 0xffffffff; +} + +uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub) +{ + int32 i; + uint32 res = 0; + + for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) + res |= (src->bv[i] = ~(sub->bv[i]) & src->bv[i]); + return res; +} + + +/* + * fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub) + * This has been moved into a macro in fsg_psubtree.h + * because it is called so frequently! + */ + + +/* + * Add the word emitted by the given transition (fsglink) to the given lextree + * (rooted at root), and return the new lextree root. (There may actually be + * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. + * "root" is the head of this list.) + * lclist, rclist: sets of left and right context phones for this link. + * alloc_head: head of a linear list of all allocated pnodes for the parent + * FSG state, kept elsewhere and updated by this routine. + */ +static fsg_pnode_t * +psubtree_add_trans(fsg_lextree_t *lextree, + fsg_pnode_t * root, + fsg_glist_linklist_t **curglist, + fsg_link_t * fsglink, + int16 *lclist, int16 *rclist, + fsg_pnode_t ** alloc_head) +{ + int32 silcipid; /* Silence CI phone ID */ + int32 pronlen; /* Pronunciation length */ + int32 wid; /* FSG (not dictionary!!) word ID */ + int32 dictwid; /* Dictionary (not FSG!!) word ID */ + int32 ssid; /* Senone Sequence ID */ + int32 tmatid; + gnode_t *gn; + fsg_pnode_t *pnode, *pred, *head; + int32 n_ci, p, lc, rc; + glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ + glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ + int32 i, j; + int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; + + silcipid = bin_mdef_silphone(lextree->mdef); + n_ci = bin_mdef_n_ciphone(lextree->mdef); + + wid = fsg_link_wid(fsglink); + assert(wid >= 0); /* Cannot be a null transition */ + dictwid = dict_wordid(lextree->dict, + fsg_model_word_str(lextree->fsg, wid)); + pronlen = dict_pronlen(lextree->dict, dictwid); + assert(pronlen >= 1); + + assert(lclist[0] >= 0); /* At least one phonetic context provided */ + assert(rclist[0] >= 0); + + head = *alloc_head; + pred = NULL; + + if (pronlen == 1) { /* Single-phone word */ + int ci = dict_first_phone(lextree->dict, dictwid); + /* Only non-filler words are mpx */ + if (!dict_filler_word(lextree->dict, dictwid)) { + /* + * Left diphone ID for single-phone words already assumes SIL is right + * context; only left contexts need to be handled. + */ + lc_pnodelist = NULL; + + for (i = 0; lclist[i] >= 0; i++) { + lc = lclist[i]; + ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); + /* Check if this ssid already allocated for some other context */ + for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + + if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { + /* already allocated; share it for this context phone */ + fsg_pnode_add_ctxt(pnode, lc); + break; + } + } + + if (!gn) { /* ssid not already allocated */ + pnode = + (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); + pnode->ctx = lextree->ctx; + pnode->next.fsglink = fsglink; + pnode->logs2prob = + (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->wip + lextree->pip; + pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); + pnode->ppos = 0; + pnode->leaf = TRUE; + pnode->sibling = root; /* All root nodes linked together */ + fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ + pnode->alloc_next = head; + head = pnode; + root = pnode; + ++n_lc_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + lc_pnodelist = + glist_add_ptr(lc_pnodelist, (void *) pnode); + } + } + + glist_free(lc_pnodelist); + } + else { /* Filler word; no context modelled */ + ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ + tmatid = bin_mdef_pid2tmatid(lextree->mdef, ci); + + pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); + pnode->ctx = lextree->ctx; + pnode->next.fsglink = fsglink; + pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->wip + lextree->pip; + pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ + pnode->ppos = 0; + pnode->leaf = TRUE; + pnode->sibling = root; + fsg_pnode_add_all_ctxt(&(pnode->ctxt)); + pnode->alloc_next = head; + head = pnode; + root = pnode; + ++n_int_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + } + } + else { /* Multi-phone word */ + fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ + ssid_pnode_map = + (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); + lc_pnodelist = NULL; + rc_pnodelist = NULL; + + for (p = 0; p < pronlen; p++) { + int ci = dict_pron(lextree->dict, dictwid, p); + if (p == 0) { /* Root phone, handle required left contexts */ + /* Find if we already have an lc_pnodelist for the first phone of this word */ + fsg_glist_linklist_t *glist; + + rc = dict_pron(lextree->dict, dictwid, 1); + for (glist = *curglist; + glist && glist->glist; + glist = glist->next) { + if (glist->ci == ci && glist->rc == rc) + break; + } + if (glist && glist->glist) { + assert(glist->ci == ci && glist->rc == rc); + /* We've found a valid glist. Hook to it and move to next phoneme */ + E_DEBUG(2,("Found match for (%d,%d)\n", ci, rc)); + lc_pnodelist = glist->glist; + /* Set the predecessor node for the future tree first */ + pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); + continue; + } + else { + /* Two cases that can bring us here + * a. glist == NULL, i.e. end of current list. Create new entry. + * b. glist->glist == NULL, i.e. first entry into list. + */ + if (glist == NULL) { /* Case a; reduce it to case b by allocing glist */ + glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); + glist->next = *curglist; + *curglist = glist; + } + glist->ci = ci; + glist->rc = rc; + lc_pnodelist = glist->glist = NULL; /* Gets created below */ + } + + for (i = 0; lclist[i] >= 0; i++) { + lc = lclist[i]; + ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); + /* Compression is not done by d2p, so we do it + * here. This might be slow, but it might not + * be... we'll see. */ + pnode = ssid_pnode_map[0]; + for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { + pnode = ssid_pnode_map[j]; + if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) + break; + } + assert(j < n_ci); + if (!pnode) { /* Allocate pnode for this new ssid */ + pnode = + (fsg_pnode_t *) ckd_calloc(1, + sizeof + (fsg_pnode_t)); + pnode->ctx = lextree->ctx; + /* This bit is tricky! For now we'll put the prob in the final link only */ + /* pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->wip + lextree->pip; */ + pnode->logs2prob = lextree->wip + lextree->pip; + pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); + pnode->ppos = 0; + pnode->leaf = FALSE; + pnode->sibling = root; /* All root nodes linked together */ + pnode->alloc_next = head; + head = pnode; + root = pnode; + ++n_lc_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + lc_pnodelist = + glist_add_ptr(lc_pnodelist, (void *) pnode); + ssid_pnode_map[j] = pnode; + } + fsg_pnode_add_ctxt(pnode, lc); + } + /* Put the lc_pnodelist back into glist */ + glist->glist = lc_pnodelist; + + /* The predecessor node for the future tree is the root */ + pred = root; + } + else if (p != pronlen - 1) { /* Word internal phone */ + fsg_pnode_t *pnodeyoungest; + + ssid = dict2pid_internal(lextree->d2p, dictwid, p); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p)); + /* First check if we already have this ssid in our tree */ + pnode = pred->next.succ; + pnodeyoungest = pnode; /* The youngest sibling */ + while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { + pnode = pnode->sibling; + } + if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { + /* Found the ssid; go to next phoneme */ + E_DEBUG(2,("Found match for %d\n", ci)); + pred = pnode; + continue; + } + + /* pnode not found, allocate it */ + pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); + pnode->ctx = lextree->ctx; + pnode->logs2prob = lextree->pip; + pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); + pnode->ppos = p; + pnode->leaf = FALSE; + pnode->sibling = pnodeyoungest; /* May be NULL */ + if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ + for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { + pred = (fsg_pnode_t *) gnode_ptr(gn); + pred->next.succ = pnode; + } + } + else { /* Predecessor = word internal node */ + pred->next.succ = pnode; + } + pnode->alloc_next = head; + head = pnode; + ++n_int_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + pred = pnode; + } + else { /* Leaf phone, handle required right contexts */ + /* Note, leaf phones are not part of the tree */ + xwdssid_t *rssid; + memset((void *) ssid_pnode_map, 0, + n_ci * sizeof(fsg_pnode_t *)); + lc = dict_pron(lextree->dict, dictwid, p-1); + rssid = dict2pid_rssid(lextree->d2p, ci, lc); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p)); + + for (i = 0; rclist[i] >= 0; i++) { + rc = rclist[i]; + + j = rssid->cimap[rc]; + ssid = rssid->ssid[j]; + pnode = ssid_pnode_map[j]; + + if (!pnode) { /* Allocate pnode for this new ssid */ + pnode = + (fsg_pnode_t *) ckd_calloc(1, + sizeof + (fsg_pnode_t)); + pnode->ctx = lextree->ctx; + /* We are plugging the word prob here. Ugly */ + /* pnode->logs2prob = lextree->pip; */ + pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->pip; + pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); + pnode->ppos = p; + pnode->leaf = TRUE; + pnode->sibling = rc_pnodelist ? + (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; + pnode->next.fsglink = fsglink; + pnode->alloc_next = head; + head = pnode; + ++n_rc_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + rc_pnodelist = + glist_add_ptr(rc_pnodelist, (void *) pnode); + ssid_pnode_map[j] = pnode; + } + else { + assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); + } + fsg_pnode_add_ctxt(pnode, rc); + } + + if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ + for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { + pred = (fsg_pnode_t *) gnode_ptr(gn); + if (!pred->next.succ) + pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); + else { + /* Link to the end of the sibling chain */ + fsg_pnode_t *succ = pred->next.succ; + while (succ->sibling) succ = succ->sibling; + succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); + /* Since all entries of lc_pnodelist point + to the same array, sufficient to update it once */ + break; + } + } + } + else { /* Predecessor = word internal node */ + if (!pred->next.succ) + pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); + else { + /* Link to the end of the sibling chain */ + fsg_pnode_t *succ = pred->next.succ; + while (succ->sibling) succ = succ->sibling; + succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); + } + } + } + } + + ckd_free((void *) ssid_pnode_map); + /* glist_free(lc_pnodelist); Nope; this gets freed outside */ + glist_free(rc_pnodelist); + } + + E_DEBUG(2,("Allocated %d HMMs (%d lc, %d rc, %d internal)\n", + n_lc_alloc + n_rc_alloc + n_int_alloc, + n_lc_alloc, n_rc_alloc, n_int_alloc)); + *alloc_head = head; + + return root; +} + + +static fsg_pnode_t * +fsg_psubtree_init(fsg_lextree_t *lextree, + fsg_model_t * fsg, int32 from_state, + fsg_pnode_t ** alloc_head) +{ + fsg_arciter_t *itor; + fsg_link_t *fsglink; + fsg_pnode_t *root; + int32 n_ci, n_arc; + fsg_glist_linklist_t *glist = NULL; + + root = NULL; + assert(*alloc_head == NULL); + + n_ci = bin_mdef_n_ciphone(lextree->mdef); + if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) { + E_FATAL + ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n", + FSG_PNODE_CTXT_BVSZ * 32); + } + + n_arc = 0; + for (itor = fsg_model_arcs(fsg, from_state); itor; + itor = fsg_arciter_next(itor)) { + int32 dst; + fsglink = fsg_arciter_get(itor); + dst = fsglink->to_state; + + if (fsg_link_wid(fsglink) < 0) + continue; + + E_DEBUG(2,("Building lextree for arc from %d to %d: %s\n", + from_state, dst, fsg_model_word_str(fsg, fsg_link_wid(fsglink)))); + root = psubtree_add_trans(lextree, root, &glist, fsglink, + lextree->lc[from_state], + lextree->rc[dst], + alloc_head); + ++n_arc; + } + E_DEBUG(2,("State %d has %d outgoing arcs\n", from_state, n_arc)); + + fsg_glist_linklist_free(glist); + + return root; +} + + +static void +fsg_psubtree_free(fsg_pnode_t * head) +{ + fsg_pnode_t *next; + + while (head) { + next = head->alloc_next; + hmm_deinit(&head->hmm); + ckd_free(head); + head = next; + } +} + +void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp) +{ + int32 i; + fsg_link_t *tl; + + /* Indentation */ + for (i = 0; i <= node->ppos; i++) + fprintf(fp, " "); + + fprintf(fp, "%p.@", node); /* Pointer used as node + * ID */ + fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm)); + fprintf(fp, " %10d.LP", node->logs2prob); + fprintf(fp, " %p.SIB", node->sibling); + fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos); + if ((node->ppos == 0) || node->leaf) { + fprintf(fp, " ["); + for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) + fprintf(fp, "%08x", node->ctxt.bv[i]); + fprintf(fp, "]"); + } + if (node->leaf) { + tl = node->next.fsglink; + fprintf(fp, " {%s[%d->%d](%d)}", + fsg_model_word_str(tree->fsg, tl->wid), + tl->from_state, tl->to_state, tl->logs2prob); + } else { + fprintf(fp, " %p.NXT", node->next.succ); + } + fprintf(fp, "\n"); + + return; +} + +void +fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE * fp) +{ + fsg_pnode_t *succ; + + if (root == NULL) return; + if (root->ppos == 0) { + while(root->sibling && root->sibling->next.succ == root->next.succ) { + fsg_psubtree_dump_node(tree, root, fp); + root = root->sibling; + } + fflush(fp); + } + + fsg_psubtree_dump_node(tree, root, fp); + + if (root->leaf) { + if (root->ppos == 0 && root->sibling) { /* For single-phone words */ + fsg_psubtree_dump(tree, root->sibling,fp); + } + return; + } + + succ = root->next.succ; + while(succ) { + fsg_psubtree_dump(tree, succ,fp); + succ = succ->sibling; + } + + if (root->ppos == 0) { + fsg_psubtree_dump(tree, root->sibling,fp); + fflush(fp); + } + + return; +} + +void +fsg_psubtree_pnode_deactivate(fsg_pnode_t * pnode) +{ + hmm_clear(&pnode->hmm); +} |