diff options
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/lm')
18 files changed, 0 insertions, 11621 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c b/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c deleted file mode 100644 index 374897754..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c +++ /dev/null @@ -1,944 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2004 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - -/* System headers. */ -#ifdef _WIN32_WCE -/*MC in a debug build it's implicitly included by assert.h - but you need this in a release build */ -#include <windows.h> -#else -#include <time.h> -#endif /* _WIN32_WCE */ -#include <stdio.h> -#include <string.h> -#include <assert.h> - -/* SphinxBase headers. */ -#include "sphinxbase/err.h" -#include "sphinxbase/pio.h" -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/prim_type.h" -#include "sphinxbase/strfuncs.h" -#include "sphinxbase/hash_table.h" -#include "sphinxbase/fsg_model.h" - -/** - * Adjacency list (opaque) for a state in an FSG. - * - * Actually we use hash tables so that random access is a bit faster. - * Plus it allows us to make the lookup code a bit less ugly. - */ - -struct trans_list_s { - hash_table_t *null_trans; /* Null transitions keyed by state. */ - hash_table_t *trans; /* Lists of non-null transitions keyed by state. */ -}; - -/** - * Implementation of arc iterator. - */ -struct fsg_arciter_s { - hash_iter_t *itor, *null_itor; - gnode_t *gn; -}; - -#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN" -#define FSG_MODEL_END_DECL "FSG_END" -#define FSG_MODEL_N_DECL "N" -#define FSG_MODEL_NUM_STATES_DECL "NUM_STATES" -#define FSG_MODEL_S_DECL "S" -#define FSG_MODEL_START_STATE_DECL "START_STATE" -#define FSG_MODEL_F_DECL "F" -#define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE" -#define FSG_MODEL_T_DECL "T" -#define FSG_MODEL_TRANSITION_DECL "TRANSITION" -#define FSG_MODEL_COMMENT_CHAR '#' - - -static int32 -nextline_str2words(FILE * fp, int32 * lineno, - char **lineptr, char ***wordptr) -{ - for (;;) { - size_t len; - int32 n; - - ckd_free(*lineptr); - if ((*lineptr = fread_line(fp, &len)) == NULL) - return -1; - - (*lineno)++; - - if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) - continue; /* Skip comment lines */ - - n = str2words(*lineptr, NULL, 0); - if (n == 0) - continue; /* Skip blank lines */ - - /* Abuse of realloc(), but this doesn't have to be fast. */ - if (*wordptr == NULL) - *wordptr = ckd_calloc(n, sizeof(**wordptr)); - else - *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); - return str2words(*lineptr, *wordptr, n); - } -} - -void -fsg_model_trans_add(fsg_model_t * fsg, - int32 from, int32 to, int32 logp, int32 wid) -{ - fsg_link_t *link; - glist_t gl; - gnode_t *gn; - - if (fsg->trans[from].trans == NULL) - fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES); - - /* Check for duplicate link (i.e., link already exists with label=wid) */ - for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) { - link = (fsg_link_t *) gnode_ptr(gn); - if (link->wid == wid) { - if (link->logs2prob < logp) - link->logs2prob = logp; - return; - } - } - - /* Create transition object */ - link = listelem_malloc(fsg->link_alloc); - link->from_state = from; - link->to_state = to; - link->logs2prob = logp; - link->wid = wid; - - /* Add it to the list of transitions and update the hash table */ - gl = glist_add_ptr(gl, (void *) link); - hash_table_replace_bkey(fsg->trans[from].trans, - (char const *) &link->to_state, - sizeof(link->to_state), gl); -} - -int32 -fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, - int32 logp, int32 wid) -{ - fsg_link_t *link, *link2; - - /* Check for transition probability */ - if (logp > 0) { - E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", - from, to); - } - - /* Self-loop null transitions (with prob <= 1.0) are redundant */ - if (from == to) - return -1; - - if (fsg->trans[from].null_trans == NULL) - fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES); - - /* Check for a duplicate link; if found, keep the higher prob */ - link = fsg_model_null_trans(fsg, from, to); - if (link) { - if (link->logs2prob < logp) { - link->logs2prob = logp; - return 0; - } - else - return -1; - } - - /* Create null transition object */ - link = listelem_malloc(fsg->link_alloc); - link->from_state = from; - link->to_state = to; - link->logs2prob = logp; - link->wid = -1; - - link2 = (fsg_link_t *) - hash_table_enter_bkey(fsg->trans[from].null_trans, - (char const *) &link->to_state, - sizeof(link->to_state), link); - assert(link == link2); - - return 1; -} - -int32 -fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, - int32 logp) -{ - return fsg_model_tag_trans_add(fsg, from, to, logp, -1); -} - -glist_t -fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls) -{ - gnode_t *gn1; - int updated; - fsg_link_t *tl1, *tl2; - int32 k, n; - - E_INFO("Computing transitive closure for null transitions\n"); - - /* If our caller didn't give us a list of null-transitions, - make such a list. Just loop through all the FSG states, - and all the null-transitions in that state (which are kept in - their own hash table). */ - if (nulls == NULL) { - int i; - for (i = 0; i < fsg->n_state; ++i) { - hash_iter_t *itor; - hash_table_t *null_trans = fsg->trans[i].null_trans; - if (null_trans == NULL) - continue; - for (itor = hash_table_iter(null_trans); - itor != NULL; - itor = hash_table_iter_next(itor)) { - nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent)); - } - } - } - - /* - * Probably not the most efficient closure implementation, in general, but - * probably reasonably efficient for a sparse null transition matrix. - */ - n = 0; - do { - updated = FALSE; - - for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { - hash_iter_t *itor; - - tl1 = (fsg_link_t *) gnode_ptr(gn1); - assert(tl1->wid < 0); - - if (fsg->trans[tl1->to_state].null_trans == NULL) - continue; - - for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans); - itor; itor = hash_table_iter_next(itor)) { - - tl2 = (fsg_link_t *) hash_entry_val(itor->ent); - - k = fsg_model_null_trans_add(fsg, - tl1->from_state, - tl2->to_state, - tl1->logs2prob + - tl2->logs2prob); - if (k >= 0) { - updated = TRUE; - if (k > 0) { - nulls = glist_add_ptr(nulls, (void *) - fsg_model_null_trans - (fsg, tl1->from_state, - tl2->to_state)); - n++; - } - } - } - } - } while (updated); - - E_INFO("%d null transitions added\n", n); - - return nulls; -} - -glist_t -fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j) -{ - void *val; - - if (fsg->trans[i].trans == NULL) - return NULL; - if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j, - sizeof(j), &val) < 0) - return NULL; - return (glist_t) val; -} - -fsg_link_t * -fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j) -{ - void *val; - - if (fsg->trans[i].null_trans == NULL) - return NULL; - if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j, - sizeof(j), &val) < 0) - return NULL; - return (fsg_link_t *) val; -} - -fsg_arciter_t * -fsg_model_arcs(fsg_model_t * fsg, int32 i) -{ - fsg_arciter_t *itor; - - if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL) - return NULL; - itor = ckd_calloc(1, sizeof(*itor)); - if (fsg->trans[i].null_trans) - itor->null_itor = hash_table_iter(fsg->trans[i].null_trans); - if (fsg->trans[i].trans) - itor->itor = hash_table_iter(fsg->trans[i].trans); - if (itor->itor != NULL) - itor->gn = hash_entry_val(itor->itor->ent); - return itor; -} - -fsg_link_t * -fsg_arciter_get(fsg_arciter_t * itor) -{ - /* Iterate over non-null arcs first. */ - if (itor->gn) - return (fsg_link_t *) gnode_ptr(itor->gn); - else if (itor->null_itor) - return (fsg_link_t *) hash_entry_val(itor->null_itor->ent); - else - return NULL; -} - -fsg_arciter_t * -fsg_arciter_next(fsg_arciter_t * itor) -{ - /* Iterate over non-null arcs first. */ - if (itor->gn) { - itor->gn = gnode_next(itor->gn); - /* Move to the next destination arc. */ - if (itor->gn == NULL) { - itor->itor = hash_table_iter_next(itor->itor); - if (itor->itor != NULL) - itor->gn = hash_entry_val(itor->itor->ent); - else if (itor->null_itor == NULL) - goto stop_iteration; - } - } - else { - if (itor->null_itor == NULL) - goto stop_iteration; - itor->null_itor = hash_table_iter_next(itor->null_itor); - if (itor->null_itor == NULL) - goto stop_iteration; - } - return itor; - stop_iteration: - fsg_arciter_free(itor); - return NULL; - -} - -void -fsg_arciter_free(fsg_arciter_t * itor) -{ - if (itor == NULL) - return; - hash_table_iter_free(itor->null_itor); - hash_table_iter_free(itor->itor); - ckd_free(itor); -} - -int -fsg_model_word_id(fsg_model_t * fsg, char const *word) -{ - int wid; - - /* Search for an existing word matching this. */ - for (wid = 0; wid < fsg->n_word; ++wid) { - if (0 == strcmp(fsg->vocab[wid], word)) - break; - } - /* If not found, add this to the vocab. */ - if (wid == fsg->n_word) - return -1; - return wid; -} - -int -fsg_model_word_add(fsg_model_t * fsg, char const *word) -{ - int wid, old_size; - - /* Search for an existing word matching this. */ - wid = fsg_model_word_id(fsg, word); - /* If not found, add this to the vocab. */ - if (wid == -1) { - wid = fsg->n_word; - if (fsg->n_word == fsg->n_word_alloc) { - old_size = fsg->n_word_alloc; - fsg->n_word_alloc += 10; - fsg->vocab = ckd_realloc(fsg->vocab, - fsg->n_word_alloc * - sizeof(*fsg->vocab)); - if (fsg->silwords) - fsg->silwords = - bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc); - if (fsg->altwords) - fsg->altwords = - bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc); - } - ++fsg->n_word; - fsg->vocab[wid] = ckd_salloc(word); - } - return wid; -} - -int -fsg_model_add_silence(fsg_model_t * fsg, char const *silword, - int state, float32 silprob) -{ - int32 logsilp; - int n_trans, silwid, src; - - E_INFO("Adding silence transitions for %s to FSG\n", silword); - - silwid = fsg_model_word_add(fsg, silword); - logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); - if (fsg->silwords == NULL) - fsg->silwords = bitvec_alloc(fsg->n_word_alloc); - bitvec_set(fsg->silwords, silwid); - - n_trans = 0; - if (state == -1) { - for (src = 0; src < fsg->n_state; src++) { - fsg_model_trans_add(fsg, src, src, logsilp, silwid); - ++n_trans; - } - } - else { - fsg_model_trans_add(fsg, state, state, logsilp, silwid); - ++n_trans; - } - - E_INFO("Added %d silence word transitions\n", n_trans); - return n_trans; -} - -int -fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, - char const *altword) -{ - int i, basewid, altwid; - int ntrans; - - /* FIXME: This will get slow, eventually... */ - for (basewid = 0; basewid < fsg->n_word; ++basewid) - if (0 == strcmp(fsg->vocab[basewid], baseword)) - break; - if (basewid == fsg->n_word) { - E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); - return -1; - } - altwid = fsg_model_word_add(fsg, altword); - if (fsg->altwords == NULL) - fsg->altwords = bitvec_alloc(fsg->n_word_alloc); - bitvec_set(fsg->altwords, altwid); - if (fsg_model_is_filler(fsg, basewid)) { - if (fsg->silwords == NULL) - fsg->silwords = bitvec_alloc(fsg->n_word_alloc); - bitvec_set(fsg->silwords, altwid); - } - - E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n", - baseword, altword)); - - /* Look for all transitions involving baseword and duplicate them. */ - /* FIXME: This will also get slow, eventually... */ - ntrans = 0; - for (i = 0; i < fsg->n_state; ++i) { - hash_iter_t *itor; - if (fsg->trans[i].trans == NULL) - continue; - for (itor = hash_table_iter(fsg->trans[i].trans); itor; - itor = hash_table_iter_next(itor)) { - glist_t trans; - gnode_t *gn; - - trans = hash_entry_val(itor->ent); - for (gn = trans; gn; gn = gnode_next(gn)) { - fsg_link_t *fl = gnode_ptr(gn); - if (fl->wid == basewid) { - fsg_link_t *link; - - /* Create transition object */ - link = listelem_malloc(fsg->link_alloc); - link->from_state = fl->from_state; - link->to_state = fl->to_state; - link->logs2prob = fl->logs2prob; /* FIXME!!!??? */ - link->wid = altwid; - - trans = glist_add_ptr(trans, (void *) link); - ++ntrans; - } - } - hash_entry_val(itor->ent) = trans; - } - } - - E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans)); - return ntrans; -} - - -fsg_model_t * -fsg_model_init(char const *name, logmath_t * lmath, float32 lw, - int32 n_state) -{ - fsg_model_t *fsg; - - /* Allocate basic stuff. */ - fsg = ckd_calloc(1, sizeof(*fsg)); - fsg->refcount = 1; - fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t)); - fsg->lmath = lmath; - fsg->name = name ? ckd_salloc(name) : NULL; - fsg->n_state = n_state; - fsg->lw = lw; - - fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans)); - - return fsg; -} - -fsg_model_t * -fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) -{ - fsg_model_t *fsg; - hash_table_t *vocab; - hash_iter_t *itor; - int32 lastwid; - char **wordptr; - char *lineptr; - char *fsgname; - int32 lineno; - int32 n, i, j; - int n_state, n_trans, n_null_trans; - glist_t nulls; - float32 p; - - lineno = 0; - vocab = hash_table_new(32, FALSE); - wordptr = NULL; - lineptr = NULL; - nulls = NULL; - fsgname = NULL; - fsg = NULL; - - /* Scan upto FSG_BEGIN header */ - for (;;) { - n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); - if (n < 0) { - E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); - goto parse_error; - } - - if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { - if (n > 2) { - E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", - lineno); - goto parse_error; - } - break; - } - } - /* Save FSG name, or it will get clobbered below :(. - * If name is missing, try the default. - */ - if (n == 2) { - fsgname = ckd_salloc(wordptr[1]); - } - else { - E_WARN("FSG name is missing\n"); - fsgname = ckd_salloc("unknown"); - } - - /* Read #states */ - n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); - if ((n != 2) - || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) - && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) - || (sscanf(wordptr[1], "%d", &n_state) != 1) - || (n_state <= 0)) { - E_ERROR - ("Line[%d]: #states declaration line missing or malformed\n", - lineno); - goto parse_error; - } - - /* Now create the FSG. */ - fsg = fsg_model_init(fsgname, lmath, lw, n_state); - ckd_free(fsgname); - fsgname = NULL; - - /* Read start state */ - n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); - if ((n != 2) - || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) - && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) - || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) - || (fsg->start_state < 0) - || (fsg->start_state >= fsg->n_state)) { - E_ERROR - ("Line[%d]: start state declaration line missing or malformed\n", - lineno); - goto parse_error; - } - - /* Read final state */ - n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); - if ((n != 2) - || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) - && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) - || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) - || (fsg->final_state < 0) - || (fsg->final_state >= fsg->n_state)) { - E_ERROR - ("Line[%d]: final state declaration line missing or malformed\n", - lineno); - goto parse_error; - } - - /* Read transitions */ - lastwid = 0; - n_trans = n_null_trans = 0; - for (;;) { - int32 wid, tprob; - - n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); - if (n <= 0) { - E_ERROR("Line[%d]: transition or FSG_END statement expected\n", - lineno); - goto parse_error; - } - - if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { - break; - } - - if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) - || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { - - - if (((n != 4) && (n != 5)) - || (sscanf(wordptr[1], "%d", &i) != 1) - || (sscanf(wordptr[2], "%d", &j) != 1) - || (i < 0) || (i >= fsg->n_state) - || (j < 0) || (j >= fsg->n_state)) { - E_ERROR - ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", - lineno); - goto parse_error; - } - - p = atof_c(wordptr[3]); - if ((p <= 0.0) || (p > 1.0)) { - E_ERROR - ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", - lineno); - goto parse_error; - } - } - else { - E_ERROR("Line[%d]: transition or FSG_END statement expected\n", - lineno); - goto parse_error; - } - - tprob = (int32) (logmath_log(lmath, p) * fsg->lw); - /* Add word to "dictionary". */ - if (n > 4) { - if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { - (void) hash_table_enter_int32(vocab, - ckd_salloc(wordptr[4]), - lastwid); - wid = lastwid; - ++lastwid; - } - fsg_model_trans_add(fsg, i, j, tprob, wid); - ++n_trans; - } - else { - if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { - ++n_null_trans; - nulls = - glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); - } - } - } - - E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", - fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); - - - /* Now create a string table from the "dictionary" */ - fsg->n_word = hash_table_inuse(vocab); - fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ - fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); - for (itor = hash_table_iter(vocab); itor; - itor = hash_table_iter_next(itor)) { - char const *word = hash_entry_key(itor->ent); - int32 wid = (int32) (long) hash_entry_val(itor->ent); - fsg->vocab[wid] = (char *) word; - } - hash_table_free(vocab); - - /* Do transitive closure on null transitions */ - nulls = fsg_model_null_trans_closure(fsg, nulls); - glist_free(nulls); - - ckd_free(lineptr); - ckd_free(wordptr); - - return fsg; - - parse_error: - for (itor = hash_table_iter(vocab); itor; - itor = hash_table_iter_next(itor)) - ckd_free((char *) hash_entry_key(itor->ent)); - glist_free(nulls); - hash_table_free(vocab); - ckd_free(fsgname); - ckd_free(lineptr); - ckd_free(wordptr); - fsg_model_free(fsg); - return NULL; -} - - -fsg_model_t * -fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw) -{ - FILE *fp; - fsg_model_t *fsg; - - if ((fp = fopen(file, "r")) == NULL) { - E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); - return NULL; - } - fsg = fsg_model_read(fp, lmath, lw); - fclose(fp); - return fsg; -} - -fsg_model_t * -fsg_model_retain(fsg_model_t * fsg) -{ - ++fsg->refcount; - return fsg; -} - -static void -trans_list_free(fsg_model_t * fsg, int32 i) -{ - hash_iter_t *itor; - - /* FIXME (maybe): FSG links will all get freed when we call - * listelem_alloc_free() so don't bother freeing them explicitly - * here. */ - if (fsg->trans[i].trans) { - for (itor = hash_table_iter(fsg->trans[i].trans); - itor; itor = hash_table_iter_next(itor)) { - glist_t gl = (glist_t) hash_entry_val(itor->ent); - glist_free(gl); - } - } - hash_table_free(fsg->trans[i].trans); - hash_table_free(fsg->trans[i].null_trans); -} - -int -fsg_model_free(fsg_model_t * fsg) -{ - int i; - - if (fsg == NULL) - return 0; - - if (--fsg->refcount > 0) - return fsg->refcount; - - for (i = 0; i < fsg->n_word; ++i) - ckd_free(fsg->vocab[i]); - for (i = 0; i < fsg->n_state; ++i) - trans_list_free(fsg, i); - ckd_free(fsg->trans); - ckd_free(fsg->vocab); - listelem_alloc_free(fsg->link_alloc); - bitvec_free(fsg->silwords); - bitvec_free(fsg->altwords); - ckd_free(fsg->name); - ckd_free(fsg); - return 0; -} - - -void -fsg_model_write(fsg_model_t * fsg, FILE * fp) -{ - int32 i; - - fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, - fsg->name ? fsg->name : ""); - fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); - fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); - fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); - - for (i = 0; i < fsg->n_state; i++) { - fsg_arciter_t *itor; - - for (itor = fsg_model_arcs(fsg, i); itor; - itor = fsg_arciter_next(itor)) { - fsg_link_t *tl = fsg_arciter_get(itor); - - fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, - tl->from_state, tl->to_state, - logmath_exp(fsg->lmath, - (int32) (tl->logs2prob / fsg->lw)), - (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); - } - } - - fprintf(fp, "%s\n", FSG_MODEL_END_DECL); - - fflush(fp); -} - -void -fsg_model_writefile(fsg_model_t * fsg, char const *file) -{ - FILE *fp; - - assert(fsg); - - E_INFO("Writing FSG file '%s'\n", file); - - if ((fp = fopen(file, "w")) == NULL) { - E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); - return; - } - - fsg_model_write(fsg, fp); - - fclose(fp); -} - -static void -fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp) -{ - fsg_arciter_t *itor; - - for (itor = fsg_model_arcs(fsg, i); itor; - itor = fsg_arciter_next(itor)) { - fsg_link_t *tl = fsg_arciter_get(itor); - fprintf(fp, "%d %d %s %f\n", - tl->from_state, tl->to_state, - (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid), - -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw)); - } -} - -void -fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp) -{ - int i; - - /* Write transitions from initial state first. */ - fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp); - - /* Other states. */ - for (i = 0; i < fsg->n_state; i++) { - if (i == fsg_model_start_state(fsg)) - continue; - fsg_model_write_fsm_trans(fsg, i, fp); - } - - /* Final state. */ - fprintf(fp, "%d 0\n", fsg_model_final_state(fsg)); - - fflush(fp); -} - -void -fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file) -{ - FILE *fp; - - assert(fsg); - - E_INFO("Writing FSM file '%s'\n", file); - - if ((fp = fopen(file, "w")) == NULL) { - E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file); - return; - } - - fsg_model_write_fsm(fsg, fp); - - fclose(fp); -} - -void -fsg_model_write_symtab(fsg_model_t * fsg, FILE * file) -{ - int i; - - fprintf(file, "<eps> 0\n"); - for (i = 0; i < fsg_model_n_word(fsg); ++i) { - fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1); - } - fflush(file); -} - -void -fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file) -{ - FILE *fp; - - assert(fsg); - - E_INFO("Writing FSM symbol table '%s'\n", file); - - if ((fp = fopen(file, "w")) == NULL) { - E_ERROR("Failed to open symbol table '%s' for writing", file); - return; - } - - fsg_model_write_symtab(fsg, fp); - - fclose(fp); -} diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c deleted file mode 100644 index 90e161c62..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/jsgf.c +++ /dev/null @@ -1,943 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - -#include <string.h> -#include <assert.h> - -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/strfuncs.h" -#include "sphinxbase/hash_table.h" -#include "sphinxbase/filename.h" -#include "sphinxbase/err.h" -#include "sphinxbase/jsgf.h" - -#include "jsgf_internal.h" -#include "jsgf_parser.h" -#include "jsgf_scanner.h" - -extern int yyparse (void* scanner, jsgf_t* jsgf); - -/** - * \file jsgf.c - * - * This file implements the data structures for parsing JSGF grammars - * into Sphinx finite-state grammars. - **/ - -static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, int rule_exit); - -jsgf_atom_t * -jsgf_atom_new(char *name, float weight) -{ - jsgf_atom_t *atom; - - atom = ckd_calloc(1, sizeof(*atom)); - atom->name = ckd_salloc(name); - atom->weight = weight; - return atom; -} - -int -jsgf_atom_free(jsgf_atom_t *atom) -{ - if (atom == NULL) - return 0; - ckd_free(atom->name); - ckd_free(atom); - return 0; -} - -jsgf_t * -jsgf_grammar_new(jsgf_t *parent) -{ - jsgf_t *grammar; - - grammar = ckd_calloc(1, sizeof(*grammar)); - /* If this is an imported/subgrammar, then we will share a global - * namespace with the parent grammar. */ - if (parent) { - grammar->rules = parent->rules; - grammar->imports = parent->imports; - grammar->searchpath = parent->searchpath; - grammar->parent = parent; - } - else { - grammar->rules = hash_table_new(64, 0); - grammar->imports = hash_table_new(16, 0); - } - - return grammar; -} - -void -jsgf_grammar_free(jsgf_t *jsgf) -{ - /* FIXME: Probably should just use refcounting instead. */ - if (jsgf->parent == NULL) { - hash_iter_t *itor; - gnode_t *gn; - - for (itor = hash_table_iter(jsgf->rules); itor; - itor = hash_table_iter_next(itor)) { - ckd_free((char *)itor->ent->key); - jsgf_rule_free((jsgf_rule_t *)itor->ent->val); - } - hash_table_free(jsgf->rules); - for (itor = hash_table_iter(jsgf->imports); itor; - itor = hash_table_iter_next(itor)) { - ckd_free((char *)itor->ent->key); - jsgf_grammar_free((jsgf_t *)itor->ent->val); - } - hash_table_free(jsgf->imports); - for (gn = jsgf->searchpath; gn; gn = gnode_next(gn)) - ckd_free(gnode_ptr(gn)); - glist_free(jsgf->searchpath); - for (gn = jsgf->links; gn; gn = gnode_next(gn)) - ckd_free(gnode_ptr(gn)); - glist_free(jsgf->links); - } - ckd_free(jsgf->name); - ckd_free(jsgf->version); - ckd_free(jsgf->charset); - ckd_free(jsgf->locale); - ckd_free(jsgf); -} - -static void -jsgf_rhs_free(jsgf_rhs_t *rhs) -{ - gnode_t *gn; - - if (rhs == NULL) - return; - - jsgf_rhs_free(rhs->alt); - for (gn = rhs->atoms; gn; gn = gnode_next(gn)) - jsgf_atom_free(gnode_ptr(gn)); - glist_free(rhs->atoms); - ckd_free(rhs); -} - -jsgf_atom_t * -jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus) -{ - jsgf_rule_t *rule; - jsgf_atom_t *rule_atom; - jsgf_rhs_t *rhs; - - /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */ - /* Or if plus is true, (<name> | <name> <g0006>) */ - rhs = ckd_calloc(1, sizeof(*rhs)); - if (plus) - rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0)); - else - rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0)); - rule = jsgf_define_rule(jsgf, NULL, rhs, 0); - rule_atom = jsgf_atom_new(rule->name, 1.0); - rhs = ckd_calloc(1, sizeof(*rhs)); - rhs->atoms = glist_add_ptr(NULL, rule_atom); - rhs->atoms = glist_add_ptr(rhs->atoms, atom); - rule->rhs->alt = rhs; - - return jsgf_atom_new(rule->name, 1.0); -} - -jsgf_rule_t * -jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp) -{ - jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs)); - jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0); - rhs->alt = exp; - rhs->atoms = glist_add_ptr(NULL, atom); - return jsgf_define_rule(jsgf, NULL, rhs, 0); -} - -void -jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to) -{ - jsgf_link_t *link; - - link = ckd_calloc(1, sizeof(*link)); - link->from = from; - link->to = to; - link->atom = atom; - grammar->links = glist_add_ptr(grammar->links, link); -} - -static char * -extract_grammar_name(char *rule_name) -{ - char* dot_pos; - char* grammar_name = ckd_salloc(rule_name + 1); - if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) { - ckd_free(grammar_name); - return NULL; - } - *dot_pos='\0'; - return grammar_name; -} - -char const * -jsgf_grammar_name(jsgf_t *jsgf) -{ - return jsgf->name; -} - -static char * -jsgf_fullname(jsgf_t *jsgf, const char *name) -{ - char *fullname; - - /* Check if it is already qualified */ - if (strchr(name + 1, '.')) - return ckd_salloc(name); - - /* Skip leading < in name */ - fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4); - sprintf(fullname, "<%s.%s", jsgf->name, name + 1); - return fullname; -} - -static char * -jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name) -{ - char *fullname, *grammar_name; - - /* Check if it is already qualified */ - if (strchr(name + 1, '.')) - return ckd_salloc(name); - - /* Skip leading < in name */ - if ((grammar_name = extract_grammar_name(rule->name)) == NULL) - return ckd_salloc(name); - fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4); - sprintf(fullname, "<%s.%s", grammar_name, name + 1); - ckd_free(grammar_name); - - return fullname; -} - -/* Extract as rulename everything after the secondlast dot, if existent. - * Because everything before the secondlast dot is the path-specification. */ -static char * -importname2rulename(char *importname) -{ - char *rulename = ckd_salloc(importname); - char *last_dotpos; - char *secondlast_dotpos; - - if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) { - *last_dotpos='\0'; - if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) { - *last_dotpos='.'; - *secondlast_dotpos='<'; - secondlast_dotpos = ckd_salloc(secondlast_dotpos); - ckd_free(rulename); - return secondlast_dotpos; - } - else { - *last_dotpos='.'; - return rulename; - } - } - else { - return rulename; - } -} - -#define NO_NODE -1 -#define RECURSIVE_NODE -2 - -/** - * - * Expand a right-hand-side of a rule (i.e. a single alternate). - * - * @returns the FSG state at the end of this rule, NO_NODE if there's an - * error, and RECURSIVE_NODE if the right-hand-side ended in right-recursion (i.e. - * a link to an earlier FSG state). - */ -static int -expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs, - int rule_entry, int rule_exit) -{ - gnode_t *gn; - int lastnode; - - /* Last node expanded in this sequence. */ - lastnode = rule_entry; - - /* Iterate over atoms in rhs and generate links/nodes */ - for (gn = rhs->atoms; gn; gn = gnode_next(gn)) { - jsgf_atom_t *atom = gnode_ptr(gn); - - if (jsgf_atom_is_rule(atom)) { - jsgf_rule_t *subrule; - char *fullname; - gnode_t *subnode; - jsgf_rule_stack_t *rule_stack_entry = NULL; - - /* Special case for <NULL> and <VOID> pseudo-rules - If this is the only atom in the rhs, and it's the - first rhs in the rule, then emit a null transition, - creating an exit state if needed. */ - if (0 == strcmp(atom->name, "<NULL>")) { - if (gn == rhs->atoms && gnode_next(gn) == NULL) { - if (rule_exit == NO_NODE) { - jsgf_add_link(grammar, atom, - lastnode, grammar->nstate); - rule_exit = lastnode = grammar->nstate; - ++grammar->nstate; - } else { - jsgf_add_link(grammar, atom, - lastnode, rule_exit); - } - } - continue; - } - else if (0 == strcmp(atom->name, "<VOID>")) { - /* Make this entire RHS unspeakable */ - return NO_NODE; - } - - fullname = jsgf_fullname_from_rule(rule, atom->name); - if (hash_table_lookup(grammar->rules, fullname, (void**)&subrule) == -1) { - E_ERROR("Undefined rule in RHS: %s\n", fullname); - ckd_free(fullname); - return NO_NODE; - } - ckd_free(fullname); - - /* Look for this subrule in the stack of expanded rules */ - for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode)) { - rule_stack_entry = (jsgf_rule_stack_t *)gnode_ptr(subnode); - if (rule_stack_entry->rule == subrule) - break; - } - - if (subnode != NULL) { - /* Allow right-recursion only. */ - if (gnode_next(gn) != NULL) { - E_ERROR("Only right-recursion is permitted (in %s.%s)\n", - grammar->name, rule->name); - return NO_NODE; - } - /* Add a link back to the beginning of this rule instance */ - E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, rule_stack_entry->entry); - jsgf_add_link(grammar, atom, lastnode, rule_stack_entry->entry); - - /* Let our caller know that this rhs didn't reach an - end state. */ - lastnode = RECURSIVE_NODE; - } - else { - /* If this is the last atom in this rhs, link its - expansion to the parent rule's exit state. - Otherwise, create a new exit state for it. */ - int subruleexit = NO_NODE; - if (gnode_next(gn) == NULL && rule_exit >= 0) - subruleexit = rule_exit; - - /* Expand the subrule */ - lastnode = expand_rule(grammar, subrule, lastnode, subruleexit); - - if (lastnode == NO_NODE) - return NO_NODE; - } - } - else { - /* An exit-state is created if this isn't the last atom - in the rhs, or if the containing rule doesn't have an - exit state yet. - Otherwise, the rhs's exit state becomes the containing - rule's exit state. */ - int exitstate; - if (gnode_next(gn) == NULL && rule_exit >= 0) { - exitstate = rule_exit; - } else { - exitstate = grammar->nstate; - ++grammar->nstate; - } - - /* Add a link for this token */ - jsgf_add_link(grammar, atom, - lastnode, exitstate); - lastnode = exitstate; - } - } - - return lastnode; -} - -static int -expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, - int rule_exit) -{ - jsgf_rule_stack_t* rule_stack_entry; - jsgf_rhs_t *rhs; - - /* Push this rule onto the stack */ - rule_stack_entry = (jsgf_rule_stack_t*)ckd_calloc(1, sizeof (jsgf_rule_stack_t)); - rule_stack_entry->rule = rule; - rule_stack_entry->entry = rule_entry; - grammar->rulestack = glist_add_ptr(grammar->rulestack, - rule_stack_entry); - - for (rhs = rule->rhs; rhs; rhs = rhs->alt) { - int lastnode; - - lastnode = expand_rhs(grammar, rule, rhs, - rule_entry, rule_exit); - - if (lastnode == NO_NODE) { - return NO_NODE; - } else if (lastnode == RECURSIVE_NODE) { - /* The rhs ended with right-recursion, i.e. a transition to - an earlier state. Nothing needs to happen at this level. */ - ; - } else if (rule_exit == NO_NODE) { - /* If this rule doesn't have an exit state yet, use the exit - state of its first right-hand-side. - All other right-hand-sides will use this exit state. */ - assert (lastnode >= 0); - rule_exit = lastnode; - } - } - - /* If no exit-state was created, use the entry-state. */ - if (rule_exit == NO_NODE) { - rule_exit = rule_entry; - } - - /* Pop this rule from the rule stack */ - ckd_free(gnode_ptr(grammar->rulestack)); - grammar->rulestack = gnode_free(grammar->rulestack, NULL); - - return rule_exit; -} - -jsgf_rule_iter_t * -jsgf_rule_iter(jsgf_t *grammar) -{ - return hash_table_iter(grammar->rules); -} - -jsgf_rule_t * -jsgf_get_rule(jsgf_t *grammar, char const *name) -{ - void *val; - char *fullname; - - fullname = string_join("<", name, ">", NULL); - if (hash_table_lookup(grammar->rules, fullname, &val) < 0) { - ckd_free(fullname); - return NULL; - } - ckd_free(fullname); - return (jsgf_rule_t *)val; -} - -jsgf_rule_t * -jsgf_get_public_rule(jsgf_t *grammar) -{ - jsgf_rule_iter_t *itor; - jsgf_rule_t *public_rule = NULL; - - for (itor = jsgf_rule_iter(grammar); itor; - itor = jsgf_rule_iter_next(itor)) { - jsgf_rule_t *rule = jsgf_rule_iter_rule(itor); - if (jsgf_rule_public(rule)) { - const char *rule_name = jsgf_rule_name(rule); - char *dot_pos; - if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) { - public_rule = rule; - jsgf_rule_iter_free(itor); - break; - } - if (0 == strncmp(rule_name + 1, jsgf_grammar_name(grammar), dot_pos - rule_name - 1)) { - public_rule = rule; - jsgf_rule_iter_free(itor); - break; - } - } - } - return public_rule; -} - -char const * -jsgf_rule_name(jsgf_rule_t *rule) -{ - return rule->name; -} - -int -jsgf_rule_public(jsgf_rule_t *rule) -{ - return rule->is_public; -} - -static fsg_model_t * -jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule, - logmath_t *lmath, float32 lw, int do_closure) -{ - fsg_model_t *fsg; - glist_t nulls; - gnode_t *gn; - int rule_entry, rule_exit; - - /* Clear previous links */ - for (gn = grammar->links; gn; gn = gnode_next(gn)) { - ckd_free(gnode_ptr(gn)); - } - glist_free(grammar->links); - grammar->links = NULL; - grammar->nstate = 0; - - /* Create the top-level entry state, and expand the - top-level rule. */ - rule_entry = grammar->nstate++; - rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); - - /* If no exit-state was created, create one. */ - if (rule_exit == NO_NODE) { - rule_exit = grammar->nstate++; - jsgf_add_link(grammar, NULL, rule_entry, rule_exit); - } - - fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); - fsg->start_state = rule_entry; - fsg->final_state = rule_exit; - grammar->links = glist_reverse(grammar->links); - for (gn = grammar->links; gn; gn = gnode_next(gn)) { - jsgf_link_t *link = gnode_ptr(gn); - - if (link->atom) { - if (jsgf_atom_is_rule(link->atom)) { - fsg_model_null_trans_add(fsg, link->from, link->to, - logmath_log(lmath, link->atom->weight)); - } - else { - int wid = fsg_model_word_add(fsg, link->atom->name); - fsg_model_trans_add(fsg, link->from, link->to, - logmath_log(lmath, link->atom->weight), wid); - } - } - else { - fsg_model_null_trans_add(fsg, link->from, link->to, 0); - } - } - if (do_closure) { - nulls = fsg_model_null_trans_closure(fsg, NULL); - glist_free(nulls); - } - - return fsg; -} - -fsg_model_t * -jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, - logmath_t *lmath, float32 lw) -{ - return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE); -} - -fsg_model_t * -jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, - logmath_t *lmath, float32 lw) -{ - return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE); -} - -fsg_model_t * -jsgf_read_file(const char *file, logmath_t * lmath, float32 lw) -{ - fsg_model_t *fsg; - jsgf_rule_t *rule; - jsgf_t *jsgf; - jsgf_rule_iter_t *itor; - - if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) { - E_ERROR("Error parsing file: %s\n", file); - return NULL; - } - - rule = NULL; - for (itor = jsgf_rule_iter(jsgf); itor; - itor = jsgf_rule_iter_next(itor)) { - rule = jsgf_rule_iter_rule(itor); - if (jsgf_rule_public(rule)) { - jsgf_rule_iter_free(itor); - break; - } - } - if (rule == NULL) { - E_ERROR("No public rules found in %s\n", file); - return NULL; - } - fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); - jsgf_grammar_free(jsgf); - return fsg; -} - -fsg_model_t * -jsgf_read_string(const char *string, logmath_t * lmath, float32 lw) -{ - fsg_model_t *fsg; - jsgf_rule_t *rule; - jsgf_t *jsgf; - jsgf_rule_iter_t *itor; - - if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) { - E_ERROR("Error parsing input string\n"); - return NULL; - } - - rule = NULL; - for (itor = jsgf_rule_iter(jsgf); itor; - itor = jsgf_rule_iter_next(itor)) { - rule = jsgf_rule_iter_rule(itor); - if (jsgf_rule_public(rule)) { - jsgf_rule_iter_free(itor); - break; - } - } - if (rule == NULL) { - jsgf_grammar_free(jsgf); - E_ERROR("No public rules found in input string\n"); - return NULL; - } - fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); - jsgf_grammar_free(jsgf); - return fsg; -} - - -int -jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh) -{ - fsg_model_t *fsg; - logmath_t *lmath = logmath_init(1.0001, 0, 0); - - if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL) - goto error_out; - - fsg_model_write(fsg, outfh); - logmath_free(lmath); - return 0; - -error_out: - logmath_free(lmath); - return -1; -} - -jsgf_rule_t * -jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public) -{ - jsgf_rule_t *rule; - void *val; - - if (name == NULL) { - name = ckd_malloc(strlen(jsgf->name) + 16); - sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules)); - } - else { - char *newname; - - newname = jsgf_fullname(jsgf, name); - name = newname; - } - - rule = ckd_calloc(1, sizeof(*rule)); - rule->refcnt = 1; - rule->name = ckd_salloc(name); - rule->rhs = rhs; - rule->is_public = is_public; - - E_INFO("Defined rule: %s%s\n", - rule->is_public ? "PUBLIC " : "", - rule->name); - val = hash_table_enter(jsgf->rules, name, rule); - if (val != (void *)rule) { - E_WARN("Multiply defined symbol: %s\n", name); - } - return rule; -} - -jsgf_rule_t * -jsgf_rule_retain(jsgf_rule_t *rule) -{ - ++rule->refcnt; - return rule; -} - -int -jsgf_rule_free(jsgf_rule_t *rule) -{ - if (rule == NULL) - return 0; - if (--rule->refcnt > 0) - return rule->refcnt; - jsgf_rhs_free(rule->rhs); - ckd_free(rule->name); - ckd_free(rule); - return 0; -} - - -/* FIXME: This should go in libsphinxutil */ -static char * -path_list_search(glist_t paths, char *path) -{ - gnode_t *gn; - - for (gn = paths; gn; gn = gnode_next(gn)) { - char *fullpath; - FILE *tmp; - - fullpath = string_join(gnode_ptr(gn), "/", path, NULL); - tmp = fopen(fullpath, "r"); - if (tmp != NULL) { - fclose(tmp); - return fullpath; - } - else { - ckd_free(fullpath); - } - } - return NULL; -} - -jsgf_rule_t * -jsgf_import_rule(jsgf_t *jsgf, char *name) -{ - char *c, *path, *newpath; - size_t namelen, packlen; - void *val; - jsgf_t *imp; - int import_all; - - /* Trim the leading and trailing <> */ - namelen = strlen(name); - path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */ - strcpy(path, name + 1); - /* Split off the first part of the name */ - c = strrchr(path, '.'); - if (c == NULL) { - E_ERROR("Imported rule is not qualified: %s\n", name); - ckd_free(path); - return NULL; - } - packlen = c - path; - *c = '\0'; - - /* Look for import foo.* */ - import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>")); - - /* Construct a filename. */ - for (c = path; *c; ++c) - if (*c == '.') *c = '/'; - strcat(path, ".gram"); - newpath = path_list_search(jsgf->searchpath, path); - if (newpath == NULL) { - E_ERROR("Failed to find grammar %s\n", path); - ckd_free(path); - return NULL; - } - ckd_free(path); - - path = newpath; - E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name); - - /* FIXME: Also, we need to make sure that path is fully qualified - * here, by adding any prefixes from jsgf->name to it. */ - /* See if we have parsed it already */ - if (hash_table_lookup(jsgf->imports, path, &val) == 0) { - E_INFO("Already imported %s\n", path); - imp = val; - ckd_free(path); - } - else { - /* If not, parse it. */ - imp = jsgf_parse_file(path, jsgf); - val = hash_table_enter(jsgf->imports, path, imp); - if (val != (void *)imp) { - E_WARN("Multiply imported file: %s\n", path); - } - } - if (imp != NULL) { - hash_iter_t *itor; - /* Look for public rules matching rulename. */ - for (itor = hash_table_iter(imp->rules); itor; - itor = hash_table_iter_next(itor)) { - hash_entry_t *he = itor->ent; - jsgf_rule_t *rule = hash_entry_val(he); - int rule_matches; - char *rule_name = importname2rulename(name); - - if (import_all) { - /* Match package name (symbol table is shared) */ - rule_matches = !strncmp(rule_name, rule->name, packlen + 1); - } - else { - /* Exact match */ - rule_matches = !strcmp(rule_name, rule->name); - } - ckd_free(rule_name); - if (rule->is_public && rule_matches) { - void *val; - char *newname; - - /* Link this rule into the current namespace. */ - c = strrchr(rule->name, '.'); - assert(c != NULL); - newname = jsgf_fullname(jsgf, c); - - E_INFO("Imported %s\n", newname); - val = hash_table_enter(jsgf->rules, newname, - jsgf_rule_retain(rule)); - if (val != (void *)rule) { - E_WARN("Multiply defined symbol: %s\n", newname); - } - if (!import_all) { - hash_table_iter_free(itor); - return rule; - } - } - } - } - - return NULL; -} - -static void -jsgf_set_search_path(jsgf_t *jsgf, const char *filename) -{ - char *jsgf_path; - -#if !defined(_WIN32_WCE) - if ((jsgf_path = getenv("JSGF_PATH")) != NULL) { - char *word, *c; - /* FIXME: This should be a function in libsphinxbase. */ - word = jsgf_path = ckd_salloc(jsgf_path); - while ((c = strchr(word, ':'))) { - *c = '\0'; - jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); - word = c + 1; - } - jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); - jsgf->searchpath = glist_reverse(jsgf->searchpath); - return; - } -#endif - - if (!filename) { - jsgf->searchpath = glist_add_ptr(jsgf->searchpath, ckd_salloc(".")); - return; - } - - jsgf_path = ckd_salloc(filename); - path2dirname(filename, jsgf_path); - jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path); -} - -jsgf_t * -jsgf_parse_file(const char *filename, jsgf_t *parent) -{ - yyscan_t yyscanner; - jsgf_t *jsgf; - int yyrv; - FILE *in = NULL; - - yylex_init(&yyscanner); - if (filename == NULL) { - yyset_in(stdin, yyscanner); - } - else { - in = fopen(filename, "r"); - if (in == NULL) { - E_ERROR_SYSTEM("Failed to open %s for parsing", filename); - return NULL; - } - yyset_in(in, yyscanner); - } - - jsgf = jsgf_grammar_new(parent); - - if (!parent) - jsgf_set_search_path(jsgf, filename); - - yyrv = yyparse(yyscanner, jsgf); - if (yyrv != 0) { - E_ERROR("Failed to parse JSGF grammar from '%s'\n", filename ? filename : "(stdin)"); - jsgf_grammar_free(jsgf); - yylex_destroy(yyscanner); - return NULL; - } - if (in) - fclose(in); - yylex_destroy(yyscanner); - - return jsgf; -} - -jsgf_t * -jsgf_parse_string(const char *string, jsgf_t * parent) -{ - yyscan_t yyscanner; - jsgf_t *jsgf; - int yyrv; - YY_BUFFER_STATE buf; - - yylex_init(&yyscanner); - buf = yy_scan_string(string, yyscanner); - - jsgf = jsgf_grammar_new(parent); - if (!parent) - jsgf_set_search_path(jsgf, NULL); - - yyrv = yyparse(yyscanner, jsgf); - if (yyrv != 0) { - E_ERROR("Failed to parse JSGF grammar from input string\n"); - jsgf_grammar_free(jsgf); - yy_delete_buffer(buf, yyscanner); - yylex_destroy(yyscanner); - return NULL; - } - yy_delete_buffer(buf, yyscanner); - yylex_destroy(yyscanner); - - return jsgf; -} diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h deleted file mode 100644 index a5cbc9833..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h +++ /dev/null @@ -1,140 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - -#ifndef __JSGF_INTERNAL_H__ -#define __JSGF_INTERNAL_H__ - -/** - * @file jsgf_internal.h Internal definitions for JSGF grammar compiler - */ - -#include <stdio.h> - -#include <sphinxbase/hash_table.h> -#include <sphinxbase/glist.h> -#include <sphinxbase/fsg_model.h> -#include <sphinxbase/logmath.h> -#include <sphinxbase/strfuncs.h> -#include <sphinxbase/jsgf.h> - - -/* Flex uses strdup which is missing on WinCE */ -#if defined(_WIN32) || defined(_WIN32_WCE) -#define strdup _strdup -#endif - -#ifdef __cplusplus -extern "C" { -#endif -#if 0 -/* Fool Emacs. */ -} -#endif - -#define YY_NO_INPUT /* Silence a compiler warning. */ - -typedef struct jsgf_rhs_s jsgf_rhs_t; -typedef struct jsgf_atom_s jsgf_atom_t; -typedef struct jsgf_link_s jsgf_link_t; -typedef struct jsgf_rule_stack_s jsgf_rule_stack_t; - -struct jsgf_s { - char *version; /**< JSGF version (from header) */ - char *charset; /**< JSGF charset (default UTF-8) */ - char *locale; /**< JSGF locale (default C) */ - char *name; /**< Grammar name */ - - hash_table_t *rules; /**< Defined or imported rules in this grammar. */ - hash_table_t *imports; /**< Pointers to imported grammars. */ - jsgf_t *parent; /**< Parent grammar (if this is an imported one) */ - glist_t searchpath; /**< List of directories to search for grammars. */ - - /* Scratch variables for FSG conversion. */ - int nstate; /**< Number of generated states. */ - glist_t links; /**< Generated FSG links. */ - glist_t rulestack; /**< Stack of currently expanded rules. */ -}; - -/* A type to keep track of the stack of rules currently being expanded. */ -struct jsgf_rule_stack_s { - jsgf_rule_t *rule; /**< The rule being expanded */ - int entry; /**< The entry-state for this expansion */ -}; - -struct jsgf_rule_s { - int refcnt; /**< Reference count. */ - char *name; /**< Rule name (NULL for an alternation/grouping) */ - int is_public; /**< Is this rule marked 'public'? */ - jsgf_rhs_t *rhs; /**< Expansion */ -}; - -struct jsgf_rhs_s { - glist_t atoms; /**< Sequence of items */ - jsgf_rhs_t *alt; /**< Linked list of alternates */ -}; - -struct jsgf_atom_s { - char *name; /**< Rule or token name */ - glist_t tags; /**< Tags, if any (glist_t of char *) */ - float weight; /**< Weight (default 1) */ -}; - -struct jsgf_link_s { - jsgf_atom_t *atom; /**< Name, tags, weight */ - int from; /**< From state */ - int to; /**< To state */ -}; - -#define jsgf_atom_is_rule(atom) ((atom)->name[0] == '<') - -void jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to); -jsgf_atom_t *jsgf_atom_new(char *name, float weight); -jsgf_atom_t *jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus); -jsgf_rule_t *jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp); -jsgf_rule_t *jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public); -jsgf_rule_t *jsgf_import_rule(jsgf_t *jsgf, char *name); - -int jsgf_atom_free(jsgf_atom_t *atom); -int jsgf_rule_free(jsgf_rule_t *rule); -jsgf_rule_t *jsgf_rule_retain(jsgf_rule_t *rule); - -#ifdef __cplusplus -} -#endif - - -#endif /* __JSGF_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c deleted file mode 100644 index 20acbb9d9..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c +++ /dev/null @@ -1,1799 +0,0 @@ - -/* A Bison parser, made by GNU Bison 2.4.1. */ - -/* Skeleton implementation for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "2.4.1" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 1 - -/* Push parsers. */ -#define YYPUSH 0 - -/* Pull parsers. */ -#define YYPULL 1 - -/* Using locations. */ -#define YYLSP_NEEDED 0 - - - -/* Copy the first part of user declarations. */ - -/* Line 189 of yacc.c */ -#line 37 "jsgf_parser.y" - -#define YYERROR_VERBOSE - -#include <stdio.h> -#include <string.h> - -#include <sphinxbase/hash_table.h> -#include <sphinxbase/ckd_alloc.h> -#include <sphinxbase/err.h> - -#include "jsgf_internal.h" -#include "jsgf_parser.h" -#include "jsgf_scanner.h" - -/* Suppress warnings from generated code */ -#if defined _MSC_VER -#pragma warning(disable: 4273) -#endif - -void yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s); - - - -/* Line 189 of yacc.c */ -#line 97 "jsgf_parser.c" - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 0 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* Enabling the token table. */ -#ifndef YYTOKEN_TABLE -# define YYTOKEN_TABLE 0 -#endif - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - HEADER = 258, - GRAMMAR = 259, - IMPORT = 260, - PUBLIC = 261, - TOKEN = 262, - RULENAME = 263, - TAG = 264, - WEIGHT = 265 - }; -#endif -/* Tokens. */ -#define HEADER 258 -#define GRAMMAR 259 -#define IMPORT 260 -#define PUBLIC 261 -#define TOKEN 262 -#define RULENAME 263 -#define TAG 264 -#define WEIGHT 265 - - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -{ - -/* Line 214 of yacc.c */ -#line 65 "jsgf_parser.y" - - char *name; - float weight; - jsgf_rule_t *rule; - jsgf_rhs_t *rhs; - jsgf_atom_t *atom; - - - -/* Line 214 of yacc.c */ -#line 163 "jsgf_parser.c" -} YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -#endif - - -/* Copy the second part of user declarations. */ - - -/* Line 264 of yacc.c */ -#line 175 "jsgf_parser.c" - -#ifdef short -# undef short -#endif - -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#elif (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -typedef signed char yytype_int8; -#else -typedef short int yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if YYENABLE_NLS -# if ENABLE_NLS -# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ -# define YY_(msgid) dgettext ("bison-runtime", msgid) -# endif -# endif -# ifndef YY_ -# define YY_(msgid) msgid -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(e) ((void) (e)) -#else -# define YYUSE(e) /* empty */ -#endif - -/* Identity function, used to suppress warnings about constant conditions. */ -#ifndef lint -# define YYID(n) (n) -#else -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static int -YYID (int yyi) -#else -static int -YYID (yyi) - int yyi; -#endif -{ - return yyi; -} -#endif - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined _STDLIB_H \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void free (void *); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss_alloc; - YYSTYPE yyvs_alloc; -}; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (YYID (0)) -# endif -# endif - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ - Stack = &yyptr->Stack_alloc; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (YYID (0)) - -#endif - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 7 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 54 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 20 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 16 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 33 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 58 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 265 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 14, 15, 18, 19, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 11, - 2, 12, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 16, 2, 17, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 13, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const yytype_uint8 yyprhs[] = -{ - 0, 0, 3, 5, 8, 12, 15, 18, 22, 27, - 33, 37, 39, 42, 46, 48, 51, 56, 62, 64, - 68, 70, 73, 75, 78, 80, 83, 87, 91, 93, - 95, 97, 99, 102 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const yytype_int8 yyrhs[] = -{ - 21, 0, -1, 22, -1, 22, 27, -1, 22, 25, - 27, -1, 23, 24, -1, 3, 11, -1, 3, 7, - 11, -1, 3, 7, 7, 11, -1, 3, 7, 7, - 7, 11, -1, 4, 7, 11, -1, 26, -1, 25, - 26, -1, 5, 8, 11, -1, 28, -1, 27, 28, - -1, 8, 12, 29, 11, -1, 6, 8, 12, 29, - 11, -1, 30, -1, 29, 13, 30, -1, 31, -1, - 30, 31, -1, 32, -1, 31, 9, -1, 35, -1, - 10, 35, -1, 14, 29, 15, -1, 16, 29, 17, - -1, 7, -1, 8, -1, 33, -1, 34, -1, 35, - 18, -1, 35, 19, -1 -}; - -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint8 yyrline[] = -{ - 0, 82, 82, 83, 84, 87, 90, 91, 92, 93, - 97, 100, 101, 104, 107, 108, 111, 112, 115, 116, - 121, 123, 127, 128, 132, 133, 136, 139, 142, 143, - 144, 145, 146, 147 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "HEADER", "GRAMMAR", "IMPORT", "PUBLIC", - "TOKEN", "RULENAME", "TAG", "WEIGHT", "';'", "'='", "'|'", "'('", "')'", - "'['", "']'", "'*'", "'+'", "$accept", "grammar", "header", - "jsgf_header", "grammar_header", "import_header", "import_statement", - "rule_list", "rule", "alternate_list", "rule_expansion", - "tagged_rule_item", "rule_item", "rule_group", "rule_optional", - "rule_atom", 0 -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 59, 61, 124, 40, 41, 91, 93, 42, 43 -}; -# endif - -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 20, 21, 21, 21, 22, 23, 23, 23, 23, - 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, - 30, 30, 31, 31, 32, 32, 33, 34, 35, 35, - 35, 35, 35, 35 -}; - -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 1, 2, 3, 2, 2, 3, 4, 5, - 3, 1, 2, 3, 1, 2, 4, 5, 1, 3, - 1, 2, 1, 2, 1, 2, 3, 3, 1, 1, - 1, 1, 2, 2 -}; - -/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state - STATE-NUM when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 0, 0, 0, 2, 0, 0, 6, 1, 0, 0, - 0, 0, 11, 3, 14, 0, 5, 0, 7, 0, - 0, 0, 12, 4, 15, 0, 0, 8, 13, 0, - 28, 29, 0, 0, 0, 0, 18, 20, 22, 30, - 31, 24, 10, 9, 0, 25, 0, 0, 16, 0, - 21, 23, 32, 33, 17, 26, 27, 19 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = -{ - -1, 2, 3, 4, 16, 11, 12, 13, 14, 35, - 36, 37, 38, 39, 40, 41 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -37 -static const yytype_int8 yypact[] = -{ - -1, -2, 36, 22, 35, 8, -37, -37, 32, 33, - 30, 22, -37, 17, -37, 37, -37, 13, -37, 34, - 31, -4, -37, 17, -37, 38, 39, -37, -37, -4, - -37, -37, 0, -4, -4, 18, -4, 42, -37, -37, - -37, 19, -37, -37, 21, 19, 20, 9, -37, -4, - 42, -37, -37, -37, -37, -37, -37, -4 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = -{ - -37, -37, -37, -37, -37, -37, 41, 43, -12, -16, - -3, -36, -37, -37, -37, 15 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If zero, do what YYDEFACT says. - If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const yytype_uint8 yytable[] = -{ - 50, 24, 1, 30, 31, 5, 32, 30, 31, 6, - 33, 24, 34, 44, 33, 17, 34, 46, 47, 18, - 26, 50, 49, 9, 27, 10, 56, 8, 9, 48, - 10, 49, 54, 49, 49, 55, 7, 52, 53, 15, - 19, 20, 21, 29, 25, 28, 57, 45, 0, 42, - 43, 51, 22, 0, 23 -}; - -static const yytype_int8 yycheck[] = -{ - 36, 13, 3, 7, 8, 7, 10, 7, 8, 11, - 14, 23, 16, 29, 14, 7, 16, 33, 34, 11, - 7, 57, 13, 6, 11, 8, 17, 5, 6, 11, - 8, 13, 11, 13, 13, 15, 0, 18, 19, 4, - 8, 8, 12, 12, 7, 11, 49, 32, -1, 11, - 11, 9, 11, -1, 11 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 3, 21, 22, 23, 7, 11, 0, 5, 6, - 8, 25, 26, 27, 28, 4, 24, 7, 11, 8, - 8, 12, 26, 27, 28, 7, 7, 11, 11, 12, - 7, 8, 10, 14, 16, 29, 30, 31, 32, 33, - 34, 35, 11, 11, 29, 35, 29, 29, 11, 13, - 31, 9, 18, 19, 11, 15, 17, 30 -}; - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ - -#define YYFAIL goto yyerrlab - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - yytoken = YYTRANSLATE (yychar); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (yyscanner, jsgf, YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (YYID (0)) - - -#define YYTERROR 1 -#define YYERRCODE 256 - - -/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. - If N is 0, then set CURRENT to the empty location which ends - the previous symbol: RHS[0] (always defined). */ - -#define YYRHSLOC(Rhs, K) ((Rhs)[K]) -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (YYID (N)) \ - { \ - (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC (Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC (Rhs, 0).last_column; \ - } \ - while (YYID (0)) -#endif - - -/* YY_LOCATION_PRINT -- Print the location on the stream. - This macro was not mandated originally: define only if we know - we won't break user code: when these are the locations we know. */ - -#ifndef YY_LOCATION_PRINT -# if YYLTYPE_IS_TRIVIAL -# define YY_LOCATION_PRINT(File, Loc) \ - fprintf (File, "%d.%d-%d.%d", \ - (Loc).first_line, (Loc).first_column, \ - (Loc).last_line, (Loc).last_column) -# else -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -# endif -#endif - - -/* YYLEX -- calling `yylex' with the right arguments. */ - -#ifdef YYLEX_PARAM -# define YYLEX yylex (&yylval, YYLEX_PARAM) -#else -# define YYLEX yylex (&yylval, yyscanner) -#endif - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (YYID (0)) - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value, yyscanner, jsgf); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (YYID (0)) - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf) -#else -static void -yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; - void* yyscanner; - jsgf_t *jsgf; -#endif -{ - if (!yyvaluep) - return; - YYUSE (yyscanner); - YYUSE (jsgf); -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# else - YYUSE (yyoutput); -# endif - switch (yytype) - { - default: - break; - } -} - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf) -#else -static void -yy_symbol_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; - void* yyscanner; - jsgf_t *jsgf; -#endif -{ - if (yytype < YYNTOKENS) - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf); - YYFPRINTF (yyoutput, ")"); -} - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) -#else -static void -yy_stack_print (yybottom, yytop) - yytype_int16 *yybottom; - yytype_int16 *yytop; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (; yybottom <= yytop; yybottom++) - { - int yybot = *yybottom; - YYFPRINTF (stderr, " %d", yybot); - } - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (YYID (0)) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_reduce_print (YYSTYPE *yyvsp, int yyrule, void* yyscanner, jsgf_t *jsgf) -#else -static void -yy_reduce_print (yyvsp, yyrule, yyscanner, jsgf) - YYSTYPE *yyvsp; - int yyrule; - void* yyscanner; - jsgf_t *jsgf; -#endif -{ - int yynrhs = yyr2[yyrule]; - int yyi; - unsigned long int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - YYFPRINTF (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], - &(yyvsp[(yyi + 1) - (yynrhs)]) - , yyscanner, jsgf); - YYFPRINTF (stderr, "\n"); - } -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyvsp, Rule, yyscanner, jsgf); \ -} while (YYID (0)) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static YYSIZE_T -yystrlen (const char *yystr) -#else -static YYSIZE_T -yystrlen (yystr) - const char *yystr; -#endif -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static char * -yystpcpy (char *yydest, const char *yysrc) -#else -static char * -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -#endif -{ - char *yyd = yydest; - const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into YYRESULT an error message about the unexpected token - YYCHAR while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yychar) -{ - int yyn = yypact[yystate]; - - if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; - else - { - int yytype = YYTRANSLATE (yychar); - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); - YYSIZE_T yysize = yysize0; - YYSIZE_T yysize1; - int yysize_overflow = 0; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - int yyx; - -# if 0 - /* This is so xgettext sees the translatable formats that are - constructed on the fly. */ - YY_("syntax error, unexpected %s"); - YY_("syntax error, unexpected %s, expecting %s"); - YY_("syntax error, unexpected %s, expecting %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); -# endif - char *yyfmt; - char const *yyf; - static char const yyunexpected[] = "syntax error, unexpected %s"; - static char const yyexpecting[] = ", expecting %s"; - static char const yyor[] = " or %s"; - char yyformat[sizeof yyunexpected - + sizeof yyexpecting - 1 - + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) - * (sizeof yyor - 1))]; - char const *yyprefix = yyexpecting; - - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yycount = 1; - - yyarg[0] = yytname[yytype]; - yyfmt = yystpcpy (yyformat, yyunexpected); - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - yyformat[sizeof yyunexpected - 1] = '\0'; - break; - } - yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - yyfmt = yystpcpy (yyfmt, yyprefix); - yyprefix = yyor; - } - - yyf = YY_(yyformat); - yysize1 = yysize + yystrlen (yyf); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - - if (yysize_overflow) - return YYSIZE_MAXIMUM; - - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyf) != '\0') - { - if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyf += 2; - } - else - { - yyp++; - yyf++; - } - } - } - return yysize; - } -} -#endif /* YYERROR_VERBOSE */ - - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, void* yyscanner, jsgf_t *jsgf) -#else -static void -yydestruct (yymsg, yytype, yyvaluep, yyscanner, jsgf) - const char *yymsg; - int yytype; - YYSTYPE *yyvaluep; - void* yyscanner; - jsgf_t *jsgf; -#endif -{ - YYUSE (yyvaluep); - YYUSE (yyscanner); - YYUSE (jsgf); - - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); - - switch (yytype) - { - - default: - break; - } -} - -/* Prevent warnings from -Wmissing-prototypes. */ -#ifdef YYPARSE_PARAM -#if defined __STDC__ || defined __cplusplus -int yyparse (void *YYPARSE_PARAM); -#else -int yyparse (); -#endif -#else /* ! YYPARSE_PARAM */ -#if defined __STDC__ || defined __cplusplus -int yyparse (void* yyscanner, jsgf_t *jsgf); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ - - - - - -/*-------------------------. -| yyparse or yypush_parse. | -`-------------------------*/ - -#ifdef YYPARSE_PARAM -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void *YYPARSE_PARAM) -#else -int -yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -#endif -#else /* ! YYPARSE_PARAM */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void* yyscanner, jsgf_t *jsgf) -#else -int -yyparse (yyscanner, jsgf) - void* yyscanner; - jsgf_t *jsgf; -#endif -#endif -{ -/* The lookahead symbol. */ -int yychar; - -/* The semantic value of the lookahead symbol. */ -YYSTYPE yylval; - - /* Number of syntax errors so far. */ - int yynerrs; - - int yystate; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - - /* The stacks and their tools: - `yyss': related to states. - `yyvs': related to semantic values. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs; - YYSTYPE *yyvsp; - - YYSIZE_T yystacksize; - - int yyn; - int yyresult; - /* Lookahead token as an internal (translated) token number. */ - int yytoken; - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) - - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; - - yytoken = 0; - yyss = yyssa; - yyvs = yyvsa; - yystacksize = YYINITDEPTH; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - yyssp = yyss; - yyvsp = yyvs; - - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss_alloc, yyss); - YYSTACK_RELOCATE (yyvs_alloc, yyvs); -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - if (yystate == YYFINAL) - YYACCEPT; - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - lookahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to lookahead token. */ - yyn = yypact[yystate]; - if (yyn == YYPACT_NINF) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yyn == 0 || yyn == YYTABLE_NINF) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - /* Shift the lookahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - - /* Discard the shifted token. */ - yychar = YYEMPTY; - - yystate = yyn; - *++yyvsp = yylval; - - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 5: - -/* Line 1455 of yacc.c */ -#line 87 "jsgf_parser.y" - { jsgf->name = (yyvsp[(2) - (2)].name); } - break; - - case 7: - -/* Line 1455 of yacc.c */ -#line 91 "jsgf_parser.y" - { jsgf->version = (yyvsp[(2) - (3)].name); } - break; - - case 8: - -/* Line 1455 of yacc.c */ -#line 92 "jsgf_parser.y" - { jsgf->version = (yyvsp[(2) - (4)].name); jsgf->charset = (yyvsp[(3) - (4)].name); } - break; - - case 9: - -/* Line 1455 of yacc.c */ -#line 93 "jsgf_parser.y" - { jsgf->version = (yyvsp[(2) - (5)].name); jsgf->charset = (yyvsp[(3) - (5)].name); - jsgf->locale = (yyvsp[(4) - (5)].name); } - break; - - case 10: - -/* Line 1455 of yacc.c */ -#line 97 "jsgf_parser.y" - { (yyval.name) = (yyvsp[(2) - (3)].name); } - break; - - case 13: - -/* Line 1455 of yacc.c */ -#line 104 "jsgf_parser.y" - { jsgf_import_rule(jsgf, (yyvsp[(2) - (3)].name)); ckd_free((yyvsp[(2) - (3)].name)); } - break; - - case 16: - -/* Line 1455 of yacc.c */ -#line 111 "jsgf_parser.y" - { jsgf_define_rule(jsgf, (yyvsp[(1) - (4)].name), (yyvsp[(3) - (4)].rhs), 0); ckd_free((yyvsp[(1) - (4)].name)); } - break; - - case 17: - -/* Line 1455 of yacc.c */ -#line 112 "jsgf_parser.y" - { jsgf_define_rule(jsgf, (yyvsp[(2) - (5)].name), (yyvsp[(4) - (5)].rhs), 1); ckd_free((yyvsp[(2) - (5)].name)); } - break; - - case 18: - -/* Line 1455 of yacc.c */ -#line 115 "jsgf_parser.y" - { (yyval.rhs) = (yyvsp[(1) - (1)].rhs); (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); } - break; - - case 19: - -/* Line 1455 of yacc.c */ -#line 116 "jsgf_parser.y" - { (yyval.rhs) = (yyvsp[(3) - (3)].rhs); - (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); - (yyval.rhs)->alt = (yyvsp[(1) - (3)].rhs); } - break; - - case 20: - -/* Line 1455 of yacc.c */ -#line 121 "jsgf_parser.y" - { (yyval.rhs) = ckd_calloc(1, sizeof(*(yyval.rhs))); - (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(1) - (1)].atom)); } - break; - - case 21: - -/* Line 1455 of yacc.c */ -#line 123 "jsgf_parser.y" - { (yyval.rhs) = (yyvsp[(1) - (2)].rhs); - (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(2) - (2)].atom)); } - break; - - case 23: - -/* Line 1455 of yacc.c */ -#line 128 "jsgf_parser.y" - { (yyval.atom) = (yyvsp[(1) - (2)].atom); - (yyval.atom)->tags = glist_add_ptr((yyval.atom)->tags, (yyvsp[(2) - (2)].name)); } - break; - - case 25: - -/* Line 1455 of yacc.c */ -#line 133 "jsgf_parser.y" - { (yyval.atom) = (yyvsp[(2) - (2)].atom); (yyval.atom)->weight = (yyvsp[(1) - (2)].weight); } - break; - - case 26: - -/* Line 1455 of yacc.c */ -#line 136 "jsgf_parser.y" - { (yyval.rule) = jsgf_define_rule(jsgf, NULL, (yyvsp[(2) - (3)].rhs), 0); } - break; - - case 27: - -/* Line 1455 of yacc.c */ -#line 139 "jsgf_parser.y" - { (yyval.rule) = jsgf_optional_new(jsgf, (yyvsp[(2) - (3)].rhs)); } - break; - - case 28: - -/* Line 1455 of yacc.c */ -#line 142 "jsgf_parser.y" - { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); } - break; - - case 29: - -/* Line 1455 of yacc.c */ -#line 143 "jsgf_parser.y" - { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); } - break; - - case 30: - -/* Line 1455 of yacc.c */ -#line 144 "jsgf_parser.y" - { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); } - break; - - case 31: - -/* Line 1455 of yacc.c */ -#line 145 "jsgf_parser.y" - { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); } - break; - - case 32: - -/* Line 1455 of yacc.c */ -#line 146 "jsgf_parser.y" - { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 0); } - break; - - case 33: - -/* Line 1455 of yacc.c */ -#line 147 "jsgf_parser.y" - { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 1); } - break; - - - -/* Line 1455 of yacc.c */ -#line 1580 "jsgf_parser.c" - default: break; - } - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); - - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (yyscanner, jsgf, YY_("syntax error")); -#else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yychar); - yyerror (yyscanner, jsgf, yymsg); - } - else - { - yyerror (yyscanner, jsgf, YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } -#endif - } - - - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse lookahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - YYABORT; - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval, yyscanner, jsgf); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse lookahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - /* Do not reclaim the symbols of the rule which action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (yyn != YYPACT_NINF) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; - - - yydestruct ("Error: popping", - yystos[yystate], yyvsp, yyscanner, jsgf); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - *++yyvsp = yylval; - - - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#if !defined(yyoverflow) || YYERROR_VERBOSE -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (yyscanner, jsgf, YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: - if (yychar != YYEMPTY) - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval, yyscanner, jsgf); - /* Do not reclaim the symbols of the rule which action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp, yyscanner, jsgf); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - /* Make sure YYID is used. */ - return YYID (yyresult); -} - - - -/* Line 1675 of yacc.c */ -#line 150 "jsgf_parser.y" - - -void -yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s) -{ - E_ERROR("%s at line %d current token '%s'\n", s, yyget_lineno(lex), yyget_text(lex)); -} - diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h deleted file mode 100644 index 95f68e329..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h +++ /dev/null @@ -1,90 +0,0 @@ - -/* A Bison parser, made by GNU Bison 2.4.1. */ - -/* Skeleton interface for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - HEADER = 258, - GRAMMAR = 259, - IMPORT = 260, - PUBLIC = 261, - TOKEN = 262, - RULENAME = 263, - TAG = 264, - WEIGHT = 265 - }; -#endif -/* Tokens. */ -#define HEADER 258 -#define GRAMMAR 259 -#define IMPORT 260 -#define PUBLIC 261 -#define TOKEN 262 -#define RULENAME 263 -#define TAG 264 -#define WEIGHT 265 - - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -{ - -/* Line 1676 of yacc.c */ -#line 65 "jsgf_parser.y" - - char *name; - float weight; - jsgf_rule_t *rule; - jsgf_rhs_t *rhs; - jsgf_atom_t *atom; - - - -/* Line 1676 of yacc.c */ -#line 82 "jsgf_parser.h" -} YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -#endif - - - - diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c deleted file mode 100644 index 5d41d2a6b..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c +++ /dev/null @@ -1,2199 +0,0 @@ -#line 2 "jsgf_scanner.c" - -#line 4 "jsgf_scanner.c" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 37 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* An opaque pointer. */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void* yyscan_t; -#endif - -/* For convenience, these vars (plus the bison vars far below) - are macros in the reentrant scanner. */ -#define yyin yyg->yyin_r -#define yyout yyg->yyout_r -#define yyextra yyg->yyextra_r -#define yyleng yyg->yyleng_r -#define yytext yyg->yytext_r -#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) -#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) -#define yy_flex_debug yyg->yy_flex_debug_r - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN yyg->yy_start = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START ((yyg->yy_start - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE yyrestart(yyin ,yyscanner ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE 16384 -#endif - -/* The state buf must be large enough to hold one state per character in the main buffer. - */ -#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - - /* Note: We specifically omit the test for yy_rule_can_match_eol because it requires - * access to the local variable yy_act. Since yyless() is a macro, it would break - * existing scanners that call yyless() from OUTSIDE yylex. - * One obvious solution it to make yy_act a global. I tried that, and saw - * a 5% performance hit in a non-yylineno scanner, because yy_act is - * normally declared as a register variable-- so it is not worth it. - */ - #define YY_LESS_LINENO(n) \ - do { \ - int yyl;\ - for ( yyl = n; yyl < yyleng; ++yyl )\ - if ( yytext[yyl] == '\n' )\ - --yylineno;\ - }while(0) - -/* Return all but the first "n" matched characters back to the input stream. */ -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - *yy_cp = yyg->yy_hold_char; \ - YY_RESTORE_YY_MORE_OFFSET \ - yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - yy_size_t yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via yyrestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - * - * Returns the top of the stack, or NULL. - */ -#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ - ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ - : NULL) - -/* Same as previous macro, but useful when we know that the buffer stack is not - * NULL or when we need an lvalue. For internal use only. - */ -#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] - -void yyrestart (FILE *input_file ,yyscan_t yyscanner ); -void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); -void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -void yypop_buffer_state (yyscan_t yyscanner ); - -static void yyensure_buffer_stack (yyscan_t yyscanner ); -static void yy_load_buffer_state (yyscan_t yyscanner ); -static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner ); - -#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ,yyscanner) - -YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); -YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); - -void *yyalloc (yy_size_t ,yyscan_t yyscanner ); -void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); -void yyfree (void * ,yyscan_t yyscanner ); - -#define yy_new_buffer yy_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! YY_CURRENT_BUFFER ){ \ - yyensure_buffer_stack (yyscanner); \ - YY_CURRENT_BUFFER_LVALUE = \ - yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! YY_CURRENT_BUFFER ){\ - yyensure_buffer_stack (yyscanner); \ - YY_CURRENT_BUFFER_LVALUE = \ - yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) - -/* Begin user sect3 */ - -#define yywrap(yyscanner) 1 -#define YY_SKIP_YYWRAP - -typedef unsigned char YY_CHAR; - -typedef int yy_state_type; - -#define yytext_ptr yytext_r - -static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); -static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); -static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner ); - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - yyg->yytext_ptr = yy_bp; \ - yyleng = (size_t) (yy_cp - yy_bp); \ - yyg->yy_hold_char = *yy_cp; \ - *yy_cp = '\0'; \ - yyg->yy_c_buf_p = yy_cp; - -#define YY_NUM_RULES 22 -#define YY_END_OF_BUFFER 23 -/* This struct is not used in this scanner, - but its presence is necessary. */ -struct yy_trans_info - { - flex_int32_t yy_verify; - flex_int32_t yy_nxt; - }; -static yyconst flex_int16_t yy_accept[98] = - { 0, - 0, 0, 0, 0, 0, 0, 0, 0, 23, 22, - 1, 22, 22, 22, 22, 22, 22, 22, 5, 1, - 5, 17, 1, 17, 21, 21, 18, 21, 21, 9, - 1, 9, 0, 3, 0, 0, 0, 0, 0, 0, - 4, 17, 17, 0, 17, 17, 7, 0, 20, 0, - 0, 0, 0, 0, 16, 8, 0, 0, 2, 14, - 0, 0, 0, 0, 19, 0, 17, 0, 17, 17, - 0, 0, 6, 20, 0, 15, 0, 0, 16, 0, - 0, 0, 0, 0, 19, 0, 0, 0, 10, 0, - 0, 0, 0, 12, 13, 11, 0 - - } ; - -static yyconst flex_int32_t yy_ec[256] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 4, 5, 1, 1, 1, 1, 6, - 6, 7, 6, 1, 8, 9, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 1, 12, 13, - 6, 14, 1, 1, 1, 1, 1, 1, 1, 15, - 16, 1, 1, 17, 1, 1, 1, 1, 1, 1, - 1, 1, 18, 1, 1, 1, 1, 1, 1, 1, - 6, 19, 6, 1, 1, 1, 20, 21, 22, 1, - - 23, 1, 24, 1, 25, 1, 1, 26, 27, 1, - 28, 29, 1, 30, 1, 31, 32, 1, 1, 1, - 1, 1, 33, 6, 34, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 35, 1, 1, 1, - 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - } ; - -static yyconst flex_int32_t yy_meta[38] = - { 0, - 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, - 1, 2, 3, 3, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 2, 2, 1, 1, 1 - } ; - -static yyconst flex_int16_t yy_base[113] = - { 0, - 0, 36, 4, 12, 72, 105, 14, 20, 135, 312, - 312, 117, 2, 0, 103, 105, 99, 95, 312, 312, - 119, 0, 312, 138, 312, 21, 312, 0, 1, 312, - 312, 118, 109, 312, 123, 111, 104, 94, 101, 85, - 312, 0, 171, 14, 0, 204, 312, 109, 113, 41, - 106, 96, 21, 23, 312, 312, 88, 98, 312, 312, - 73, 71, 70, 89, 312, 44, 0, 39, 0, 237, - 43, 90, 312, 312, 57, 312, 37, 69, 43, 77, - 64, 57, 58, 64, 76, 94, 79, 59, 312, 39, - 14, 14, 4, 312, 312, 312, 312, 271, 274, 277, - - 280, 283, 0, 285, 288, 290, 293, 296, 299, 302, - 305, 308 - } ; - -static yyconst flex_int16_t yy_def[113] = - { 0, - 98, 98, 99, 99, 100, 100, 101, 101, 97, 97, - 97, 97, 97, 102, 97, 97, 97, 97, 97, 97, - 97, 103, 97, 104, 97, 97, 97, 105, 106, 97, - 97, 97, 97, 97, 107, 102, 97, 97, 97, 97, - 97, 103, 104, 108, 103, 109, 97, 97, 110, 97, - 97, 105, 106, 111, 97, 97, 97, 107, 97, 97, - 97, 97, 97, 97, 97, 112, 43, 108, 43, 109, - 97, 110, 97, 97, 97, 97, 106, 111, 106, 97, - 97, 97, 97, 97, 108, 112, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 0, 97, 97, 97, - - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97 - } ; - -static yyconst flex_int16_t yy_nxt[350] = - { 0, - 42, 11, 11, 97, 12, 20, 11, 97, 34, 13, - 21, 35, 14, 20, 11, 31, 11, 65, 21, 54, - 32, 31, 11, 15, 16, 53, 32, 47, 17, 48, - 49, 50, 66, 96, 55, 95, 18, 11, 11, 54, - 12, 78, 65, 51, 94, 13, 44, 85, 14, 48, - 74, 50, 74, 87, 55, 54, 79, 66, 93, 15, - 16, 54, 86, 51, 17, 51, 74, 88, 74, 88, - 55, 53, 18, 23, 11, 24, 55, 25, 25, 65, - 33, 26, 92, 27, 28, 25, 91, 78, 74, 87, - 90, 89, 73, 84, 66, 83, 44, 85, 82, 81, - - 59, 51, 79, 80, 29, 25, 23, 11, 24, 76, - 25, 25, 86, 75, 26, 73, 27, 28, 25, 71, - 64, 63, 62, 61, 60, 59, 57, 56, 41, 40, - 39, 38, 37, 33, 97, 97, 97, 29, 25, 44, - 44, 45, 97, 44, 44, 97, 97, 44, 97, 44, - 44, 44, 97, 97, 97, 97, 46, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 44, 44, 44, 44, 45, 97, 44, 44, 97, 97, - 44, 97, 44, 44, 44, 97, 97, 97, 97, 46, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - - 97, 97, 97, 44, 44, 68, 44, 69, 97, 68, - 68, 97, 97, 68, 97, 68, 68, 68, 97, 97, - 97, 97, 70, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 68, 68, 68, 44, - 69, 97, 68, 68, 97, 97, 68, 97, 68, 68, - 68, 97, 97, 97, 97, 70, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 68, - 68, 10, 10, 10, 19, 19, 19, 22, 22, 22, - 30, 30, 30, 36, 36, 43, 43, 43, 52, 52, - 53, 53, 53, 58, 58, 58, 44, 44, 44, 67, - - 67, 67, 72, 72, 72, 77, 77, 77, 68, 68, - 68, 9, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97 - } ; - -static yyconst flex_int16_t yy_chk[350] = - { 0, - 103, 1, 1, 0, 1, 3, 3, 0, 13, 1, - 3, 13, 1, 4, 4, 7, 7, 44, 4, 29, - 7, 8, 8, 1, 1, 54, 8, 26, 1, 26, - 26, 26, 44, 93, 29, 92, 1, 2, 2, 53, - 2, 54, 68, 26, 91, 2, 66, 66, 2, 50, - 50, 50, 71, 71, 53, 77, 54, 68, 90, 2, - 2, 79, 66, 50, 2, 71, 75, 75, 88, 88, - 77, 78, 2, 5, 5, 5, 79, 5, 5, 85, - 84, 5, 83, 5, 5, 5, 82, 78, 87, 87, - 81, 80, 72, 64, 85, 63, 86, 86, 62, 61, - - 58, 87, 78, 57, 5, 5, 6, 6, 6, 52, - 6, 6, 86, 51, 6, 49, 6, 6, 6, 48, - 40, 39, 38, 37, 36, 35, 33, 32, 21, 18, - 17, 16, 15, 12, 9, 0, 0, 6, 6, 24, - 24, 24, 0, 24, 24, 0, 0, 24, 0, 24, - 24, 24, 0, 0, 0, 0, 24, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 24, 24, 43, 43, 43, 0, 43, 43, 0, 0, - 43, 0, 43, 43, 43, 0, 0, 0, 0, 43, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 43, 43, 46, 46, 46, 0, 46, - 46, 0, 0, 46, 0, 46, 46, 46, 0, 0, - 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 46, 46, 70, 70, - 70, 0, 70, 70, 0, 0, 70, 0, 70, 70, - 70, 0, 0, 0, 0, 70, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, - 70, 98, 98, 98, 99, 99, 99, 100, 100, 100, - 101, 101, 101, 102, 102, 104, 104, 104, 105, 105, - 106, 106, 106, 107, 107, 107, 108, 108, 108, 109, - - 109, 109, 110, 110, 110, 111, 111, 111, 112, 112, - 112, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 97, 97 - } ; - -/* Table of booleans, true if rule could match eol. */ -static yyconst flex_int32_t yy_rule_can_match_eol[23] = - { 0, -1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, - 0, 0, 0, }; - -/* The intent behind this definition is that it'll catch - * any uses of REJECT which flex missed. - */ -#define REJECT reject_used_but_not_detected -#define yymore() yymore_used_but_not_detected -#define YY_MORE_ADJ 0 -#define YY_RESTORE_YY_MORE_OFFSET -#line 1 "_jsgf_scanner.l" -/* -*- mode: text -*- */ -/* ==================================================================== - * Copyright (c) 2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* YOU MUST USE FLEX 2.5.37 OR NEWER TO PROCESS THIS FILE!!! */ -#line 39 "_jsgf_scanner.l" - -#include "jsgf_internal.h" -#include "jsgf_parser.h" - -#define YY_NO_UNISTD_H 1 - - - -#line 609 "jsgf_scanner.c" - -#define INITIAL 0 -#define COMMENT 1 -#define DECL 2 -#define DECLCOMMENT 3 - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -/* Holds the entire state of the reentrant scanner. */ -struct yyguts_t - { - - /* User-defined. Not touched by flex. */ - YY_EXTRA_TYPE yyextra_r; - - /* The rest are the same as the globals declared in the non-reentrant scanner. */ - FILE *yyin_r, *yyout_r; - size_t yy_buffer_stack_top; /**< index of top of stack. */ - size_t yy_buffer_stack_max; /**< capacity of stack. */ - YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ - char yy_hold_char; - yy_size_t yy_n_chars; - yy_size_t yyleng_r; - char *yy_c_buf_p; - int yy_init; - int yy_start; - int yy_did_buffer_switch_on_eof; - int yy_start_stack_ptr; - int yy_start_stack_depth; - int *yy_start_stack; - yy_state_type yy_last_accepting_state; - char* yy_last_accepting_cpos; - - int yylineno_r; - int yy_flex_debug_r; - - char *yytext_r; - int yy_more_flag; - int yy_more_len; - - YYSTYPE * yylval_r; - - }; /* end struct yyguts_t */ - -static int yy_init_globals (yyscan_t yyscanner ); - - /* This must go here because YYSTYPE and YYLTYPE are included - * from bison output in section 1.*/ - # define yylval yyg->yylval_r - -int yylex_init (yyscan_t* scanner); - -int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -int yylex_destroy (yyscan_t yyscanner ); - -int yyget_debug (yyscan_t yyscanner ); - -void yyset_debug (int debug_flag ,yyscan_t yyscanner ); - -YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner ); - -void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); - -FILE *yyget_in (yyscan_t yyscanner ); - -void yyset_in (FILE * in_str ,yyscan_t yyscanner ); - -FILE *yyget_out (yyscan_t yyscanner ); - -void yyset_out (FILE * out_str ,yyscan_t yyscanner ); - -yy_size_t yyget_leng (yyscan_t yyscanner ); - -char *yyget_text (yyscan_t yyscanner ); - -int yyget_lineno (yyscan_t yyscanner ); - -void yyset_lineno (int line_number ,yyscan_t yyscanner ); - -int yyget_column (yyscan_t yyscanner ); - -void yyset_column (int column_no ,yyscan_t yyscanner ); - -YYSTYPE * yyget_lval (yyscan_t yyscanner ); - -void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int yywrap (yyscan_t yyscanner ); -#else -extern int yywrap (yyscan_t yyscanner ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); -#endif - -#ifndef YY_NO_INPUT - -#ifdef __cplusplus -static int yyinput (yyscan_t yyscanner ); -#else -static int input (yyscan_t yyscanner ); -#endif - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#define YY_READ_BUF_SIZE 8192 -#endif - -/* Copy whatever the last rule matched to the standard output. */ -#ifndef ECHO -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ - if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ - { \ - int c = '*'; \ - size_t n; \ - for ( n = 0; n < max_size && \ - (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ - buf[n] = (char) c; \ - if ( c == '\n' ) \ - buf[n++] = (char) c; \ - if ( c == EOF && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - result = n; \ - } \ - else \ - { \ - errno=0; \ - while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ - { \ - if( errno != EINTR) \ - { \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - break; \ - } \ - errno=0; \ - clearerr(yyin); \ - } \ - }\ -\ - -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) -#endif - -/* end tables serialization structures and prototypes */ - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int yylex \ - (YYSTYPE * yylval_param ,yyscan_t yyscanner); - -#define YY_DECL int yylex \ - (YYSTYPE * yylval_param , yyscan_t yyscanner) -#endif /* !YY_DECL */ - -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - -#define YY_RULE_SETUP \ - YY_USER_ACTION - -/** The main scanner function which does all the work. - */ -YY_DECL -{ - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - -#line 59 "_jsgf_scanner.l" - - -#line 850 "jsgf_scanner.c" - - yylval = yylval_param; - - if ( !yyg->yy_init ) - { - yyg->yy_init = 1; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif - - if ( ! yyg->yy_start ) - yyg->yy_start = 1; /* first start state */ - - if ( ! yyin ) - yyin = stdin; - - if ( ! yyout ) - yyout = stdout; - - if ( ! YY_CURRENT_BUFFER ) { - yyensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - yy_load_buffer_state(yyscanner ); - } - - while ( 1 ) /* loops until end-of-file is reached */ - { - yy_cp = yyg->yy_c_buf_p; - - /* Support of yytext. */ - *yy_cp = yyg->yy_hold_char; - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - - yy_current_state = yyg->yy_start; -yy_match: - do - { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 98 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - ++yy_cp; - } - while ( yy_current_state != 97 ); - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - -yy_find_action: - yy_act = yy_accept[yy_current_state]; - - YY_DO_BEFORE_ACTION; - - if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] ) - { - int yyl; - for ( yyl = 0; yyl < yyleng; ++yyl ) - if ( yytext[yyl] == '\n' ) - - do{ yylineno++; - yycolumn=0; - }while(0) -; - } - -do_action: /* This label is used only to access EOF actions. */ - - switch ( yy_act ) - { /* beginning of action switch */ - case 0: /* must back up */ - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = yyg->yy_hold_char; - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - goto yy_find_action; - -case 1: -/* rule 1 can match eol */ -YY_RULE_SETUP -#line 61 "_jsgf_scanner.l" -; /* ignore whitespace */ - YY_BREAK -case 2: -/* rule 2 can match eol */ -YY_RULE_SETUP -#line 62 "_jsgf_scanner.l" -; /* single-line comments */ - YY_BREAK -case 3: -YY_RULE_SETUP -#line 63 "_jsgf_scanner.l" -{ BEGIN(COMMENT); } /* C-style comments */ - YY_BREAK -case 4: -YY_RULE_SETUP -#line 64 "_jsgf_scanner.l" -{ BEGIN(INITIAL); } - YY_BREAK -case 5: -YY_RULE_SETUP -#line 65 "_jsgf_scanner.l" -; /* Ignore stuff in comment mode */ - YY_BREAK -case 6: -/* rule 6 can match eol */ -YY_RULE_SETUP -#line 67 "_jsgf_scanner.l" -; /* single-line comments inside decl */ - YY_BREAK -case 7: -YY_RULE_SETUP -#line 68 "_jsgf_scanner.l" -{ BEGIN(DECLCOMMENT); } /* C-style comments inside decl */ - YY_BREAK -case 8: -YY_RULE_SETUP -#line 69 "_jsgf_scanner.l" -{ BEGIN(DECL); } - YY_BREAK -case 9: -YY_RULE_SETUP -#line 70 "_jsgf_scanner.l" -; /* Ignore stuff in comment mode */ - YY_BREAK -case 10: -YY_RULE_SETUP -#line 72 "_jsgf_scanner.l" -{BEGIN(DECL); return HEADER;} - YY_BREAK -case 11: -YY_RULE_SETUP -#line 73 "_jsgf_scanner.l" -{BEGIN(DECL); return GRAMMAR;} - YY_BREAK -case 12: -YY_RULE_SETUP -#line 74 "_jsgf_scanner.l" -{BEGIN(DECL); return IMPORT;} - YY_BREAK -case 13: -YY_RULE_SETUP -#line 75 "_jsgf_scanner.l" -{BEGIN(DECL); return PUBLIC;} - YY_BREAK -case 14: -/* rule 14 can match eol */ -YY_RULE_SETUP -#line 77 "_jsgf_scanner.l" -{ BEGIN(DECL); yylval->name = strdup(yytext); return RULENAME; } - YY_BREAK -case 15: -/* rule 15 can match eol */ -YY_RULE_SETUP -#line 78 "_jsgf_scanner.l" -{ yylval->name = strdup(yytext); return RULENAME; } - YY_BREAK -case 16: -/* rule 16 can match eol */ -YY_RULE_SETUP -#line 80 "_jsgf_scanner.l" -{ yylval->name = strdup(yytext); return TAG; } - YY_BREAK -case 17: -YY_RULE_SETUP -#line 81 "_jsgf_scanner.l" -{ yylval->name = strdup(yytext); return TOKEN; } - YY_BREAK -case 18: -YY_RULE_SETUP -#line 82 "_jsgf_scanner.l" -{ BEGIN(INITIAL); return yytext[0]; } - YY_BREAK -case 19: -/* rule 19 can match eol */ -YY_RULE_SETUP -#line 83 "_jsgf_scanner.l" -{ yylval->name = strdup(yytext); return TOKEN; } - YY_BREAK -case 20: -YY_RULE_SETUP -#line 84 "_jsgf_scanner.l" -{ yylval->weight = atof_c(yytext+1); return WEIGHT; } - YY_BREAK -case 21: -YY_RULE_SETUP -#line 85 "_jsgf_scanner.l" -return yytext[0]; /* Single-character tokens */ - YY_BREAK -case 22: -YY_RULE_SETUP -#line 87 "_jsgf_scanner.l" -ECHO; - YY_BREAK -#line 1060 "jsgf_scanner.c" -case YY_STATE_EOF(INITIAL): -case YY_STATE_EOF(COMMENT): -case YY_STATE_EOF(DECL): -case YY_STATE_EOF(DECLCOMMENT): - yyterminate(); - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = yyg->yy_hold_char; - YY_RESTORE_YY_MORE_OFFSET - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * yylex(). If so, then we have to assure - * consistency between YY_CURRENT_BUFFER and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); - - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++yyg->yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_END_OF_FILE: - { - yyg->yy_did_buffer_switch_on_eof = 0; - - if ( yywrap(yyscanner ) ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = - yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yyg->yy_c_buf_p = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ -} /* end of yylex */ - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ -static int yy_get_next_buffer (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - register char *source = yyg->yytext_ptr; - register int number_to_move, i; - int ret_val; - - if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; - - else - { - int num_to_read = - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; - - int yy_c_buf_p_offset = - (int) (yyg->yy_c_buf_p - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - yy_size_t new_size = b->yy_buf_size * 2; - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - - number_to_move - 1; - - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - yyg->yy_n_chars, num_to_read ); - - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - if ( yyg->yy_n_chars == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - yyrestart(yyin ,yyscanner); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { - /* Extend the array by 50%, plus the number we really need. */ - yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner ); - if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); - } - - yyg->yy_n_chars += number_to_move; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - - yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; - - return ret_val; -} - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - - static yy_state_type yy_get_previous_state (yyscan_t yyscanner) -{ - register yy_state_type yy_current_state; - register char *yy_cp; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - yy_current_state = yyg->yy_start; - - for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) - { - register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 98 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - } - - return yy_current_state; -} - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ - static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) -{ - register int yy_is_jam; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ - register char *yy_cp = yyg->yy_c_buf_p; - - register YY_CHAR yy_c = 1; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 98 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - yy_is_jam = (yy_current_state == 97); - - (void)yyg; - return yy_is_jam ? 0 : yy_current_state; -} - -#ifndef YY_NO_INPUT -#ifdef __cplusplus - static int yyinput (yyscan_t yyscanner) -#else - static int input (yyscan_t yyscanner) -#endif - -{ - int c; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - *yyg->yy_c_buf_p = yyg->yy_hold_char; - - if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - /* This was really a NUL. */ - *yyg->yy_c_buf_p = '\0'; - - else - { /* need more input */ - yy_size_t offset = yyg->yy_c_buf_p - yyg->yytext_ptr; - ++yyg->yy_c_buf_p; - - switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - yyrestart(yyin ,yyscanner); - - /*FALLTHROUGH*/ - - case EOB_ACT_END_OF_FILE: - { - if ( yywrap(yyscanner ) ) - return EOF; - - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(yyscanner); -#else - return input(yyscanner); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = yyg->yytext_ptr + offset; - break; - } - } - } - - c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ - *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ - yyg->yy_hold_char = *++yyg->yy_c_buf_p; - - if ( c == '\n' ) - - do{ yylineno++; - yycolumn=0; - }while(0) -; - - return c; -} -#endif /* ifndef YY_NO_INPUT */ - -/** Immediately switch to a different input stream. - * @param input_file A readable stream. - * @param yyscanner The scanner object. - * @note This function does not reset the start condition to @c INITIAL . - */ - void yyrestart (FILE * input_file , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! YY_CURRENT_BUFFER ){ - yyensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - yy_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner); - yy_load_buffer_state(yyscanner ); -} - -/** Switch to a different input buffer. - * @param new_buffer The new input buffer. - * @param yyscanner The scanner object. - */ - void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* TODO. We should be able to replace this entire function body - * with - * yypop_buffer_state(); - * yypush_buffer_state(new_buffer); - */ - yyensure_buffer_stack (yyscanner); - if ( YY_CURRENT_BUFFER == new_buffer ) - return; - - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - YY_CURRENT_BUFFER_LVALUE = new_buffer; - yy_load_buffer_state(yyscanner ); - - /* We don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - yyg->yy_did_buffer_switch_on_eof = 1; -} - -static void yy_load_buffer_state (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; - yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; - yyg->yy_hold_char = *yyg->yy_c_buf_p; -} - -/** Allocate and initialize an input buffer state. - * @param file A readable stream. - * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. - * @param yyscanner The scanner object. - * @return the allocated buffer state. - */ - YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ,yyscanner ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - yy_init_buffer(b,file ,yyscanner); - - return b; -} - -/** Destroy the buffer. - * @param b a buffer created with yy_create_buffer() - * @param yyscanner The scanner object. - */ - void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! b ) - return; - - if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ - YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - yyfree((void *) b->yy_ch_buf ,yyscanner ); - - yyfree((void *) b ,yyscanner ); -} - -/* Initializes or reinitializes a buffer. - * This function is sometimes called more than once on the same buffer, - * such as during a yyrestart() or at EOF. - */ - static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) - -{ - int oerrno = errno; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - yy_flush_buffer(b ,yyscanner); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - - /* If b is the current buffer, then yy_init_buffer was _probably_ - * called from yyrestart() or through yy_get_next_buffer. - * In that case, we don't want to reset the lineno or column. - */ - if (b != YY_CURRENT_BUFFER){ - b->yy_bs_lineno = 1; - b->yy_bs_column = 0; - } - - b->yy_is_interactive = 0; - - errno = oerrno; -} - -/** Discard all buffered characters. On the next scan, YY_INPUT will be called. - * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. - * @param yyscanner The scanner object. - */ - void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == YY_CURRENT_BUFFER ) - yy_load_buffer_state(yyscanner ); -} - -/** Pushes the new state onto the stack. The new state becomes - * the current state. This function will allocate the stack - * if necessary. - * @param new_buffer The new state. - * @param yyscanner The scanner object. - */ -void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (new_buffer == NULL) - return; - - yyensure_buffer_stack(yyscanner); - - /* This block is copied from yy_switch_to_buffer. */ - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - /* Only push if top exists. Otherwise, replace top. */ - if (YY_CURRENT_BUFFER) - yyg->yy_buffer_stack_top++; - YY_CURRENT_BUFFER_LVALUE = new_buffer; - - /* copied from yy_switch_to_buffer. */ - yy_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; -} - -/** Removes and deletes the top of the stack, if present. - * The next element becomes the new top. - * @param yyscanner The scanner object. - */ -void yypop_buffer_state (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (!YY_CURRENT_BUFFER) - return; - - yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner); - YY_CURRENT_BUFFER_LVALUE = NULL; - if (yyg->yy_buffer_stack_top > 0) - --yyg->yy_buffer_stack_top; - - if (YY_CURRENT_BUFFER) { - yy_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; - } -} - -/* Allocates the stack if it does not exist. - * Guarantees space for at least one push. - */ -static void yyensure_buffer_stack (yyscan_t yyscanner) -{ - yy_size_t num_to_alloc; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (!yyg->yy_buffer_stack) { - - /* First allocation is just for 2 elements, since we don't know if this - * scanner will even need a stack. We use 2 instead of 1 to avoid an - * immediate realloc on the next call. - */ - num_to_alloc = 1; - yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc - (num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); - - memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - - yyg->yy_buffer_stack_max = num_to_alloc; - yyg->yy_buffer_stack_top = 0; - return; - } - - if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ - - /* Increase the buffer to prepare for a possible push. */ - int grow_size = 8 /* arbitrary grow size */; - - num_to_alloc = yyg->yy_buffer_stack_max + grow_size; - yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc - (yyg->yy_buffer_stack, - num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); - - /* zero only the new slots.*/ - memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); - yyg->yy_buffer_stack_max = num_to_alloc; - } -} - -/** Setup the input buffer state to scan directly from a user-specified character buffer. - * @param base the character buffer - * @param size the size in bytes of the character buffer - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - if ( size < 2 || - base[size-2] != YY_END_OF_BUFFER_CHAR || - base[size-1] != YY_END_OF_BUFFER_CHAR ) - /* They forgot to leave room for the EOB's. */ - return 0; - - b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); - - b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ - b->yy_buf_pos = b->yy_ch_buf = base; - b->yy_is_our_buffer = 0; - b->yy_input_file = 0; - b->yy_n_chars = b->yy_buf_size; - b->yy_is_interactive = 0; - b->yy_at_bol = 1; - b->yy_fill_buffer = 0; - b->yy_buffer_status = YY_BUFFER_NEW; - - yy_switch_to_buffer(b ,yyscanner ); - - return b; -} - -/** Setup the input buffer state to scan a string. The next call to yylex() will - * scan from a @e copy of @a str. - * @param yystr a NUL-terminated string to scan - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - * @note If you want to scan bytes that may contain NUL values, then use - * yy_scan_bytes() instead. - */ -YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) -{ - - return yy_scan_bytes(yystr,strlen(yystr) ,yyscanner); -} - -/** Setup the input buffer state to scan the given bytes. The next call to yylex() will - * scan from a @e copy of @a bytes. - * @param yybytes the byte buffer to scan - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - char *buf; - yy_size_t n; - int i; - - /* Get memory for full buffer, including space for trailing EOB's. */ - n = _yybytes_len + 2; - buf = (char *) yyalloc(n ,yyscanner ); - if ( ! buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); - - for ( i = 0; i < _yybytes_len; ++i ) - buf[i] = yybytes[i]; - - buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; - - b = yy_scan_buffer(buf,n ,yyscanner); - if ( ! b ) - YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); - - /* It's okay to grow etc. this buffer, and we should throw it - * away when we're done. - */ - b->yy_is_our_buffer = 1; - - return b; -} - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner) -{ - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); -} - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - yytext[yyleng] = yyg->yy_hold_char; \ - yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ - yyg->yy_hold_char = *yyg->yy_c_buf_p; \ - *yyg->yy_c_buf_p = '\0'; \ - yyleng = yyless_macro_arg; \ - } \ - while ( 0 ) - -/* Accessor methods (get/set functions) to struct members. */ - -/** Get the user-defined data for this scanner. - * @param yyscanner The scanner object. - */ -YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyextra; -} - -/** Get the current line number. - * @param yyscanner The scanner object. - */ -int yyget_lineno (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yylineno; -} - -/** Get the current column number. - * @param yyscanner The scanner object. - */ -int yyget_column (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yycolumn; -} - -/** Get the input stream. - * @param yyscanner The scanner object. - */ -FILE *yyget_in (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyin; -} - -/** Get the output stream. - * @param yyscanner The scanner object. - */ -FILE *yyget_out (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyout; -} - -/** Get the length of the current token. - * @param yyscanner The scanner object. - */ -yy_size_t yyget_leng (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyleng; -} - -/** Get the current token. - * @param yyscanner The scanner object. - */ - -char *yyget_text (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yytext; -} - -/** Set the user-defined data. This data is never touched by the scanner. - * @param user_defined The data to be associated with this scanner. - * @param yyscanner The scanner object. - */ -void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyextra = user_defined ; -} - -/** Set the current line number. - * @param line_number - * @param yyscanner The scanner object. - */ -void yyset_lineno (int line_number , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* lineno is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); - - yylineno = line_number; -} - -/** Set the current column. - * @param line_number - * @param yyscanner The scanner object. - */ -void yyset_column (int column_no , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* column is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - YY_FATAL_ERROR( "yyset_column called with no buffer" ); - - yycolumn = column_no; -} - -/** Set the input stream. This does not discard the current - * input buffer. - * @param in_str A readable stream. - * @param yyscanner The scanner object. - * @see yy_switch_to_buffer - */ -void yyset_in (FILE * in_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyin = in_str ; -} - -void yyset_out (FILE * out_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyout = out_str ; -} - -int yyget_debug (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yy_flex_debug; -} - -void yyset_debug (int bdebug , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yy_flex_debug = bdebug ; -} - -/* Accessor methods for yylval and yylloc */ - -YYSTYPE * yyget_lval (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yylval; -} - -void yyset_lval (YYSTYPE * yylval_param , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yylval = yylval_param; -} - -/* User-visible API */ - -/* yylex_init is special because it creates the scanner itself, so it is - * the ONLY reentrant function that doesn't take the scanner as the last argument. - * That's why we explicitly handle the declaration, instead of using our macros. - */ - -int yylex_init(yyscan_t* ptr_yy_globals) - -{ - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - return yy_init_globals ( *ptr_yy_globals ); -} - -/* yylex_init_extra has the same functionality as yylex_init, but follows the - * convention of taking the scanner as the last argument. Note however, that - * this is a *pointer* to a scanner, as it will be allocated by this call (and - * is the reason, too, why this function also must handle its own declaration). - * The user defined value in the first argument will be available to yyalloc in - * the yyextra field. - */ - -int yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals ) - -{ - struct yyguts_t dummy_yyguts; - - yyset_extra (yy_user_defined, &dummy_yyguts); - - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in - yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - yyset_extra (yy_user_defined, *ptr_yy_globals); - - return yy_init_globals ( *ptr_yy_globals ); -} - -static int yy_init_globals (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - /* Initialization is the same as for the non-reentrant scanner. - * This function is called from yylex_destroy(), so don't allocate here. - */ - - yyg->yy_buffer_stack = 0; - yyg->yy_buffer_stack_top = 0; - yyg->yy_buffer_stack_max = 0; - yyg->yy_c_buf_p = (char *) 0; - yyg->yy_init = 0; - yyg->yy_start = 0; - - yyg->yy_start_stack_ptr = 0; - yyg->yy_start_stack_depth = 0; - yyg->yy_start_stack = NULL; - -/* Defined in main.c */ -#ifdef YY_STDINIT - yyin = stdin; - yyout = stdout; -#else - yyin = (FILE *) 0; - yyout = (FILE *) 0; -#endif - - /* For future reference: Set errno on error, since we are called by - * yylex_init() - */ - return 0; -} - -/* yylex_destroy is for both reentrant and non-reentrant scanners. */ -int yylex_destroy (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* Pop the buffer stack, destroying each element. */ - while(YY_CURRENT_BUFFER){ - yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner ); - YY_CURRENT_BUFFER_LVALUE = NULL; - yypop_buffer_state(yyscanner); - } - - /* Destroy the stack itself. */ - yyfree(yyg->yy_buffer_stack ,yyscanner); - yyg->yy_buffer_stack = NULL; - - /* Destroy the start condition stack. */ - yyfree(yyg->yy_start_stack ,yyscanner ); - yyg->yy_start_stack = NULL; - - /* Reset the globals. This is important in a non-reentrant scanner so the next time - * yylex() is called, initialization will occur. */ - yy_init_globals( yyscanner); - - /* Destroy the main struct (reentrant only). */ - yyfree ( yyscanner , yyscanner ); - yyscanner = NULL; - return 0; -} - -/* - * Internal utility routines. - */ - -#ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner) -{ - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; -} -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner) -{ - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; -} -#endif - -void *yyalloc (yy_size_t size , yyscan_t yyscanner) -{ - return (void *) malloc( size ); -} - -void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner) -{ - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); -} - -void yyfree (void * ptr , yyscan_t yyscanner) -{ - free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ -} - -#define YYTABLES_NAME "yytables" - -#line 87 "_jsgf_scanner.l" - - - diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h deleted file mode 100644 index 72abefb88..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h +++ /dev/null @@ -1,352 +0,0 @@ -#ifndef yyHEADER_H -#define yyHEADER_H 1 -#define yyIN_HEADER 1 - -#line 6 "jsgf_scanner.h" - -#line 8 "jsgf_scanner.h" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 37 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* An opaque pointer. */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void* yyscan_t; -#endif - -/* For convenience, these vars (plus the bison vars far below) - are macros in the reentrant scanner. */ -#define yyin yyg->yyin_r -#define yyout yyg->yyout_r -#define yyextra yyg->yyextra_r -#define yyleng yyg->yyleng_r -#define yytext yyg->yytext_r -#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) -#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) -#define yy_flex_debug yyg->yy_flex_debug_r - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE 16384 -#endif - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - yy_size_t yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -void yyrestart (FILE *input_file ,yyscan_t yyscanner ); -void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); -void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -void yypop_buffer_state (yyscan_t yyscanner ); - -YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); -YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); - -void *yyalloc (yy_size_t ,yyscan_t yyscanner ); -void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); -void yyfree (void * ,yyscan_t yyscanner ); - -/* Begin user sect3 */ - -#define yywrap(yyscanner) 1 -#define YY_SKIP_YYWRAP - -#define yytext_ptr yytext_r - -#ifdef YY_HEADER_EXPORT_START_CONDITIONS -#define INITIAL 0 -#define COMMENT 1 -#define DECL 2 -#define DECLCOMMENT 3 - -#endif - - -#ifdef HAVE_UNISTD_H -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -int yylex_init (yyscan_t* scanner); - -int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -int yylex_destroy (yyscan_t yyscanner ); - -int yyget_debug (yyscan_t yyscanner ); - -void yyset_debug (int debug_flag ,yyscan_t yyscanner ); - -YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner ); - -void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); - -FILE *yyget_in (yyscan_t yyscanner ); - -void yyset_in (FILE * in_str ,yyscan_t yyscanner ); - -FILE *yyget_out (yyscan_t yyscanner ); - -void yyset_out (FILE * out_str ,yyscan_t yyscanner ); - -yy_size_t yyget_leng (yyscan_t yyscanner ); - -char *yyget_text (yyscan_t yyscanner ); - -int yyget_lineno (yyscan_t yyscanner ); - -void yyset_lineno (int line_number ,yyscan_t yyscanner ); - -int yyget_column (yyscan_t yyscanner ); - -void yyset_column (int column_no ,yyscan_t yyscanner ); - -YYSTYPE * yyget_lval (yyscan_t yyscanner ); - -void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int yywrap (yyscan_t yyscanner ); -#else -extern int yywrap (yyscan_t yyscanner ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); -#endif - -#ifndef YY_NO_INPUT - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#define YY_READ_BUF_SIZE 8192 -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int yylex \ - (YYSTYPE * yylval_param ,yyscan_t yyscanner); - -#define YY_DECL int yylex \ - (YYSTYPE * yylval_param , yyscan_t yyscanner) -#endif /* !YY_DECL */ - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - -#undef YY_NEW_FILE -#undef YY_FLUSH_BUFFER -#undef yy_set_bol -#undef yy_new_buffer -#undef yy_set_interactive -#undef YY_DO_BEFORE_ACTION - -#ifdef YY_DECL_IS_OURS -#undef YY_DECL_IS_OURS -#undef YY_DECL -#endif - -#line 87 "_jsgf_scanner.l" - - -#line 348 "jsgf_scanner.h" -#undef yyIN_HEADER -#endif /* yyHEADER_H */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c deleted file mode 100644 index e9943001e..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c +++ /dev/null @@ -1,258 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file lm3g_model.c Core Sphinx 3-gram code used in - * DMP/DMP32/ARPA (for now) model code. - * - * Author: A cast of thousands, probably. - */ -#include <string.h> -#include <assert.h> -#include <limits.h> - -#include "sphinxbase/listelem_alloc.h" -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/err.h" - -#include "lm3g_model.h" - -void -lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g) -{ - if (lm3g->tginfo == NULL) - return; - listelem_alloc_free(lm3g->le); - ckd_free(lm3g->tginfo); -} - -void -lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g) -{ - if (lm3g->tginfo == NULL) - return; - listelem_alloc_free(lm3g->le); - memset(lm3g->tginfo, 0, base->n_counts[0] * sizeof(tginfo_t *)); - lm3g->le = listelem_alloc_init(sizeof(tginfo_t)); -} - -void -lm3g_apply_weights(ngram_model_t *base, - lm3g_model_t *lm3g, - float32 lw, float32 wip, float32 uw) -{ - int32 log_wip, log_uw, log_uniform_weight; - int i; - - /* Precalculate some log values we will like. */ - log_wip = logmath_log(base->lmath, wip); - log_uw = logmath_log(base->lmath, uw); - log_uniform_weight = logmath_log(base->lmath, 1.0 - uw); - - for (i = 0; i < base->n_counts[0]; ++i) { - int32 prob1, bo_wt, n_used; - - /* Backoff weights just get scaled by the lw. */ - bo_wt = (int32)(lm3g->unigrams[i].bo_wt1.l / base->lw); - /* Unscaling unigram probs is a bit more complicated, so punt - * it back to the general code. */ - prob1 = ngram_ng_prob(base, i, NULL, 0, &n_used); - /* Now compute the new scaled probabilities. */ - lm3g->unigrams[i].bo_wt1.l = (int32)(bo_wt * lw); - if (strcmp(base->word_str[i], "<s>") == 0) { /* FIXME: configurable start_sym */ - /* Apply language weight and WIP */ - lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip; - } - else { - /* Interpolate unigram probability with uniform. */ - prob1 += log_uw; - prob1 = logmath_add(base->lmath, prob1, base->log_uniform + log_uniform_weight); - /* Apply language weight and WIP */ - lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip; - } - } - - for (i = 0; i < lm3g->n_prob2; ++i) { - int32 prob2; - /* Can't just punt this back to general code since it is quantized. */ - prob2 = (int32)((lm3g->prob2[i].l - base->log_wip) / base->lw); - lm3g->prob2[i].l = (int32)(prob2 * lw) + log_wip; - } - - if (base->n > 2) { - for (i = 0; i < lm3g->n_bo_wt2; ++i) { - lm3g->bo_wt2[i].l = (int32)(lm3g->bo_wt2[i].l / base->lw * lw); - } - for (i = 0; i < lm3g->n_prob3; i++) { - int32 prob3; - /* Can't just punt this back to general code since it is quantized. */ - prob3 = (int32)((lm3g->prob3[i].l - base->log_wip) / base->lw); - lm3g->prob3[i].l = (int32)(prob3 * lw) + log_wip; - } - } - - /* Store updated values in the model. */ - base->log_wip = log_wip; - base->log_uw = log_uw; - base->log_uniform_weight = log_uniform_weight; - base->lw = lw; -} - -int32 -lm3g_add_ug(ngram_model_t *base, - lm3g_model_t *lm3g, int32 wid, int32 lweight) -{ - int32 score; - - /* This would be very bad if this happened! */ - assert(!NGRAM_IS_CLASSWID(wid)); - - /* Reallocate unigram array. */ - lm3g->unigrams = ckd_realloc(lm3g->unigrams, - sizeof(*lm3g->unigrams) * base->n_1g_alloc); - memset(lm3g->unigrams + base->n_counts[0], 0, - (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->unigrams)); - /* Reallocate tginfo array. */ - lm3g->tginfo = ckd_realloc(lm3g->tginfo, - sizeof(*lm3g->tginfo) * base->n_1g_alloc); - memset(lm3g->tginfo + base->n_counts[0], 0, - (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->tginfo)); - /* FIXME: we really ought to update base->log_uniform *and* - * renormalize all the other unigrams. This is really slow, so I - * will probably just provide a function to renormalize after - * adding unigrams, for anyone who really cares. */ - /* This could be simplified but then we couldn't do it in logmath */ - score = lweight + base->log_uniform + base->log_uw; - score = logmath_add(base->lmath, score, - base->log_uniform + base->log_uniform_weight); - lm3g->unigrams[wid].prob1.l = score; - /* This unigram by definition doesn't participate in any bigrams, - * so its backoff weight and bigram pointer are both undefined. */ - lm3g->unigrams[wid].bo_wt1.l = 0; - lm3g->unigrams[wid].bigrams = 0; - /* Finally, increase the unigram count */ - ++base->n_counts[0]; - /* FIXME: Note that this can actually be quite bogus due to the - * presence of class words. If wid falls outside the unigram - * count, increase it to compensate, at the cost of no longer - * really knowing how many unigrams we have :( */ - if (wid >= base->n_counts[0]) - base->n_counts[0] = wid + 1; - - return score; -} - -#define INITIAL_SORTED_ENTRIES MAX_UINT16 - -void -init_sorted_list(sorted_list_t * l) -{ - l->list = ckd_calloc(INITIAL_SORTED_ENTRIES, sizeof(sorted_entry_t)); - l->list[0].val.l = INT_MIN; - l->list[0].lower = 0; - l->list[0].higher = 0; - l->free = 1; - l->size = INITIAL_SORTED_ENTRIES; -} - -void -free_sorted_list(sorted_list_t * l) -{ - free(l->list); -} - -lmprob_t * -vals_in_sorted_list(sorted_list_t * l) -{ - lmprob_t *vals; - int32 i; - - vals = ckd_calloc(l->free, sizeof(lmprob_t)); - for (i = 0; i < l->free; i++) - vals[i] = l->list[i].val; - return (vals); -} - -int32 -sorted_id(sorted_list_t * l, int32 *val) -{ - int32 i = 0; - - for (;;) { - if (*val == l->list[i].val.l) - return (i); - if (*val < l->list[i].val.l) { - if (l->list[i].lower == 0) { - - if (l->free >= l->size) { - int newsize = l->size + INITIAL_SORTED_ENTRIES; - l->list = ckd_realloc(l->list, sizeof(sorted_entry_t) * newsize); - memset(l->list + l->size, - 0, INITIAL_SORTED_ENTRIES * sizeof(sorted_entry_t)); - l->size = newsize; - } - - l->list[i].lower = l->free; - (l->free)++; - i = l->list[i].lower; - l->list[i].val.l = *val; - return (i); - } - else - i = l->list[i].lower; - } - else { - if (l->list[i].higher == 0) { - - if (l->free >= l->size) { - int newsize = l->size + INITIAL_SORTED_ENTRIES; - l->list = ckd_realloc(l->list, sizeof(sorted_entry_t) * newsize); - memset(l->list + l->size, - 0, INITIAL_SORTED_ENTRIES * sizeof(sorted_entry_t)); - l->size = newsize; - } - - l->list[i].higher = l->free; - (l->free)++; - i = l->list[i].higher; - l->list[i].val.l = *val; - return (i); - } - else - i = l->list[i].higher; - } - } -} diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h deleted file mode 100644 index 698ed81f5..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h +++ /dev/null @@ -1,177 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file lm3g_model.h Core Sphinx 3-gram code used in - * DMP/DMP32/ARPA (for now) model code. - * - * Author: A cast of thousands, probably. - */ - -#ifndef __NGRAM_MODEL_LM3G_H__ -#define __NGRAM_MODEL_LM3G_H__ - -#include "sphinxbase/listelem_alloc.h" - -#include "ngram_model_internal.h" - -/** - * Type used to store language model probabilities - */ -typedef union { - float32 f; - int32 l; -} lmprob_t; - -/** - * Bigram probs and bo-wts, and trigram probs are kept in separate - * tables rather than within the bigram_t and trigram_t structures. - * These tables hold unique prob and bo-wt values. The following tree - * structure is used to construct these tables of unique values. - * Whenever a new value is read from the LM file, the sorted tree - * structure is searched to see if the value already exists, and - * inserted if not found. - */ -typedef struct sorted_entry_s { - lmprob_t val; /**< value being kept in this node */ - uint32 lower; /**< index of another entry. All descendants down - this path have their val < this node's val. - 0 => no son exists (0 is root index) */ - uint32 higher; /**< index of another entry. All descendants down - this path have their val > this node's val - 0 => no son exists (0 is root index) */ -} sorted_entry_t; - -/** - * The sorted list. list is a (64K long) array. The first entry is the - * root of the tree and is created during initialization. - */ -typedef struct { - sorted_entry_t *list; - int32 free; /**< first free element in list */ - int32 size; -} sorted_list_t; - -/** - * Unigram structure (common among all lm3g implementations) - */ -typedef struct unigram_s { - lmprob_t prob1; /**< Unigram probability. */ - lmprob_t bo_wt1; /**< Unigram backoff weight. */ - int32 bigrams; /**< Index of 1st entry in lm_t.bigrams[] */ -} unigram_t; - -/** - * Bigram structure (might be implemented differently) - */ -typedef struct bigram_s bigram_t; -/** - * Trigram structure (might be implemented differently) - */ -typedef struct trigram_s trigram_t; - - -/* - * To conserve space, bigram info is kept in many tables. Since the number - * of distinct values << #bigrams, these table indices can be 16-bit values. - * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy. - * It is supposed to be the index of the first trigram entry for each bigram. - * But such an index cannot be represented in 16-bits, hence the following - * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ - * consecutive entries, such that #trigrams in each segment <= 2**16 (the - * corresponding trigram segment). The bigram_t.trigrams value is then a - * 16-bit relative index within the trigram segment. A separate table-- - * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment. - */ -#define BG_SEG_SZ 512 /* chosen so that #trigram/segment <= 2**16 */ -#define LOG_BG_SEG_SZ 9 - -/** - * Trigram information cache. - * - * The following trigram information cache eliminates most traversals of 1g->2g->3g - * tree to locate trigrams for a given bigram (lw1,lw2). The organization is optimized - * for locality of access (to the same lw1), given lw2. - */ -typedef struct tginfo_s { - int32 w1; /**< lw1 component of bigram lw1,lw2. All bigrams with - same lw2 linked together (see lm_t.tginfo). */ - int32 n_tg; /**< number tg for parent bigram lw1,lw2 */ - int32 bowt; /**< tg bowt for lw1,lw2 */ - int32 used; /**< whether used since last lm_reset */ - trigram_t *tg; /**< Trigrams for lw1,lw2 */ - struct tginfo_s *next; /**< Next lw1 with same parent lw2; NULL if none. */ -} tginfo_t; - -/** - * Common internal structure for Sphinx 3-gram models. - */ -typedef struct lm3g_model_s { - unigram_t *unigrams; - bigram_t *bigrams; - trigram_t *trigrams; - lmprob_t *prob2; /**< Table of actual bigram probs */ - int32 n_prob2; /**< prob2 size */ - lmprob_t *bo_wt2; /**< Table of actual bigram backoff weights */ - int32 n_bo_wt2; /**< bo_wt2 size */ - lmprob_t *prob3; /**< Table of actual trigram probs */ - int32 n_prob3; /**< prob3 size */ - int32 *tseg_base; /**< tseg_base[i>>LOG_BG_SEG_SZ] = index of 1st - trigram for bigram segment (i>>LOG_BG_SEG_SZ) */ - tginfo_t **tginfo; /**< tginfo[lw2] is head of linked list of trigram information for - some cached subset of bigrams (*,lw2). */ - listelem_alloc_t *le; /**< List element allocator for tginfo. */ -} lm3g_model_t; - -void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g); -void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g); -void lm3g_apply_weights(ngram_model_t *base, - lm3g_model_t *lm3g, - float32 lw, float32 wip, float32 uw); -int32 lm3g_add_ug(ngram_model_t *base, - lm3g_model_t *lm3g, int32 wid, int32 lweight); - - -/** - * Initialize sorted list with the 0-th entry = MIN_PROB_F, which may be needed - * to replace spurious values in the Darpa LM file. - */ -void init_sorted_list(sorted_list_t *l); -void free_sorted_list(sorted_list_t *l); -lmprob_t *vals_in_sorted_list(sorted_list_t *l); -int32 sorted_id(sorted_list_t * l, int32 *val); - -#endif /* __NGRAM_MODEL_LM3G_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c deleted file mode 100644 index 080cfa8e6..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c +++ /dev/null @@ -1,560 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file lm3g_templates.c Core Sphinx 3-gram code used in - * DMP/DMP32/ARPA (for now) model code. - */ - -#include <assert.h> - -/* Locate a specific bigram within a bigram list */ -#define BINARY_SEARCH_THRESH 16 -static int32 -find_bg(bigram_t * bg, int32 n, int32 w) -{ - int32 i, b, e; - - /* Binary search until segment size < threshold */ - b = 0; - e = n; - while (e - b > BINARY_SEARCH_THRESH) { - i = (b + e) >> 1; - if (bg[i].wid < w) - b = i + 1; - else if (bg[i].wid > w) - e = i; - else - return i; - } - - /* Linear search within narrowed segment */ - for (i = b; (i < e) && (bg[i].wid != w); i++); - return ((i < e) ? i : -1); -} - -static int32 -lm3g_bg_score(NGRAM_MODEL_TYPE *model, - int32 lw1, int32 lw2, int32 *n_used) -{ - int32 i, n, b, score; - bigram_t *bg; - - if (lw1 < 0 || model->base.n < 2) { - *n_used = 1; - return model->lm3g.unigrams[lw2].prob1.l; - } - - b = FIRST_BG(model, lw1); - n = FIRST_BG(model, lw1 + 1) - b; - bg = model->lm3g.bigrams + b; - - if ((i = find_bg(bg, n, lw2)) >= 0) { - /* Access mode = bigram */ - *n_used = 2; - score = model->lm3g.prob2[bg[i].prob2].l; - } - else { - /* Access mode = unigram */ - *n_used = 1; - score = model->lm3g.unigrams[lw1].bo_wt1.l + model->lm3g.unigrams[lw2].prob1.l; - } - - return (score); -} - -static void -load_tginfo(NGRAM_MODEL_TYPE *model, int32 lw1, int32 lw2) -{ - int32 i, n, b, t; - bigram_t *bg; - tginfo_t *tginfo; - - /* First allocate space for tg information for bg lw1,lw2 */ - tginfo = (tginfo_t *) listelem_malloc(model->lm3g.le); - tginfo->w1 = lw1; - tginfo->tg = NULL; - tginfo->next = model->lm3g.tginfo[lw2]; - model->lm3g.tginfo[lw2] = tginfo; - - /* Locate bigram lw1,lw2 */ - b = model->lm3g.unigrams[lw1].bigrams; - n = model->lm3g.unigrams[lw1 + 1].bigrams - b; - bg = model->lm3g.bigrams + b; - - if ((n > 0) && ((i = find_bg(bg, n, lw2)) >= 0)) { - tginfo->bowt = model->lm3g.bo_wt2[bg[i].bo_wt2].l; - - /* Find t = Absolute first trigram index for bigram lw1,lw2 */ - b += i; /* b = Absolute index of bigram lw1,lw2 on disk */ - t = FIRST_TG(model, b); - - tginfo->tg = model->lm3g.trigrams + t; - - /* Find #tg for bigram w1,w2 */ - tginfo->n_tg = FIRST_TG(model, b + 1) - t; - } - else { /* No bigram w1,w2 */ - tginfo->bowt = 0; - tginfo->n_tg = 0; - } -} - -/* Similar to find_bg */ -static int32 -find_tg(trigram_t * tg, int32 n, uint32 w) -{ - int32 i, b, e; - - b = 0; - e = n; - while (e - b > BINARY_SEARCH_THRESH) { - i = (b + e) >> 1; - if (tg[i].wid < w) - b = i + 1; - else if (tg[i].wid > w) - e = i; - else - return i; - } - - for (i = b; (i < e) && (tg[i].wid != w); i++); - return ((i < e) ? i : -1); -} - -static int32 -lm3g_tg_score(NGRAM_MODEL_TYPE *model, int32 lw1, - int32 lw2, int32 lw3, int32 *n_used) -{ - ngram_model_t *base = &model->base; - int32 i, n, score; - trigram_t *tg; - tginfo_t *tginfo, *prev_tginfo; - - if ((base->n < 3) || (lw1 < 0) || (lw2 < 0)) - return (lm3g_bg_score(model, lw2, lw3, n_used)); - - prev_tginfo = NULL; - for (tginfo = model->lm3g.tginfo[lw2]; tginfo; tginfo = tginfo->next) { - if (tginfo->w1 == lw1) - break; - prev_tginfo = tginfo; - } - - if (!tginfo) { - load_tginfo(model, lw1, lw2); - tginfo = model->lm3g.tginfo[lw2]; - } - else if (prev_tginfo) { - prev_tginfo->next = tginfo->next; - tginfo->next = model->lm3g.tginfo[lw2]; - model->lm3g.tginfo[lw2] = tginfo; - } - - tginfo->used = 1; - - /* Trigrams for w1,w2 now pointed to by tginfo */ - n = tginfo->n_tg; - tg = tginfo->tg; - if ((i = find_tg(tg, n, lw3)) >= 0) { - /* Access mode = trigram */ - *n_used = 3; - score = model->lm3g.prob3[tg[i].prob3].l; - } - else { - score = tginfo->bowt + lm3g_bg_score(model, lw2, lw3, n_used); - } - - return (score); -} - -static int32 -lm3g_template_score(ngram_model_t *base, int32 wid, - int32 *history, int32 n_hist, - int32 *n_used) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; - switch (n_hist) { - case 0: - /* Access mode: unigram */ - *n_used = 1; - return model->lm3g.unigrams[wid].prob1.l; - case 1: - return lm3g_bg_score(model, history[0], wid, n_used); - case 2: - default: - /* Anything greater than 2 is the same as a trigram for now. */ - return lm3g_tg_score(model, history[1], history[0], wid, n_used); - } -} - -static int32 -lm3g_template_raw_score(ngram_model_t *base, int32 wid, - int32 *history, int32 n_hist, - int32 *n_used) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; - int32 score; - - switch (n_hist) { - case 0: - /* Access mode: unigram */ - *n_used = 1; - /* Undo insertion penalty. */ - score = model->lm3g.unigrams[wid].prob1.l - base->log_wip; - /* Undo language weight. */ - score = (int32)(score / base->lw); - /* Undo unigram interpolation */ - if (strcmp(base->word_str[wid], "<s>") != 0) { /* FIXME: configurable start_sym */ - /* This operation is numerically unstable, so try to avoid it - * as possible */ - if (base->log_uniform + base->log_uniform_weight > logmath_get_zero(base->lmath)) { - score = logmath_log(base->lmath, - logmath_exp(base->lmath, score) - - logmath_exp(base->lmath, - base->log_uniform + base->log_uniform_weight)); - } - } - return score; - case 1: - score = lm3g_bg_score(model, history[0], wid, n_used); - break; - case 2: - default: - /* Anything greater than 2 is the same as a trigram for now. */ - score = lm3g_tg_score(model, history[1], history[0], wid, n_used); - break; - } - /* FIXME (maybe): This doesn't undo unigram weighting in backoff cases. */ - return (int32)((score - base->log_wip) / base->lw); -} - -static int32 -lm3g_template_add_ug(ngram_model_t *base, - int32 wid, int32 lweight) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; - return lm3g_add_ug(base, &model->lm3g, wid, lweight); -} - -static void -lm3g_template_flush(ngram_model_t *base) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; - lm3g_tginfo_reset(base, &model->lm3g); -} - -typedef struct lm3g_iter_s { - ngram_iter_t base; - unigram_t *ug; - bigram_t *bg; - trigram_t *tg; -} lm3g_iter_t; - -static ngram_iter_t * -lm3g_template_iter(ngram_model_t *base, int32 wid, - int32 *history, int32 n_hist) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; - lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); - - ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE); - - if (n_hist == 0) { - /* Unigram is the easiest. */ - itor->ug = model->lm3g.unigrams + wid; - return (ngram_iter_t *)itor; - } - else if (n_hist == 1) { - int32 i, n, b; - /* Find the bigram, as in bg_score above (duplicate code...) */ - itor->ug = model->lm3g.unigrams + history[0]; - b = FIRST_BG(model, history[0]); - n = FIRST_BG(model, history[0] + 1) - b; - itor->bg = model->lm3g.bigrams + b; - /* If no such bigram exists then fail. */ - if ((i = find_bg(itor->bg, n, wid)) < 0) { - ngram_iter_free((ngram_iter_t *)itor); - return NULL; - } - itor->bg += i; - return (ngram_iter_t *)itor; - } - else if (n_hist == 2) { - int32 i, n; - tginfo_t *tginfo, *prev_tginfo; - /* Find the trigram, as in tg_score above (duplicate code...) */ - itor->ug = model->lm3g.unigrams + history[1]; - prev_tginfo = NULL; - for (tginfo = model->lm3g.tginfo[history[0]]; - tginfo; tginfo = tginfo->next) { - if (tginfo->w1 == history[1]) - break; - prev_tginfo = tginfo; - } - - if (!tginfo) { - load_tginfo(model, history[1], history[0]); - tginfo = model->lm3g.tginfo[history[0]]; - } - else if (prev_tginfo) { - prev_tginfo->next = tginfo->next; - tginfo->next = model->lm3g.tginfo[history[0]]; - model->lm3g.tginfo[history[0]] = tginfo; - } - - tginfo->used = 1; - - /* Trigrams for w1,w2 now pointed to by tginfo */ - n = tginfo->n_tg; - itor->tg = tginfo->tg; - if ((i = find_tg(itor->tg, n, wid)) >= 0) { - itor->tg += i; - /* Now advance the bigram pointer accordingly. FIXME: - * Note that we actually already found the relevant bigram - * in load_tginfo. */ - itor->bg = model->lm3g.bigrams; - while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1)) - <= (itor->tg - model->lm3g.trigrams)) - ++itor->bg; - return (ngram_iter_t *)itor; - } - else { - ngram_iter_free((ngram_iter_t *)itor); - return (ngram_iter_t *)NULL; - } - } - else { - /* Should not happen. */ - assert(n_hist == 0); /* Guaranteed to fail. */ - ngram_iter_free((ngram_iter_t *)itor); - return NULL; - } -} - -static ngram_iter_t * -lm3g_template_mgrams(ngram_model_t *base, int m) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; - lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); - ngram_iter_init((ngram_iter_t *)itor, base, m, FALSE); - - itor->ug = model->lm3g.unigrams; - itor->bg = model->lm3g.bigrams; - itor->tg = model->lm3g.trigrams; - - /* Advance bigram pointer to match first trigram. */ - if (m > 1 && base->n_counts[1] > 1) { - while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1)) - <= (itor->tg - model->lm3g.trigrams)) - ++itor->bg; - } - - /* Advance unigram pointer to match first bigram. */ - if (m > 0 && base->n_counts[0] > 1) { - while (itor->ug[1].bigrams <= (itor->bg - model->lm3g.bigrams)) - ++itor->ug; - } - - return (ngram_iter_t *)itor; -} - -static ngram_iter_t * -lm3g_template_successors(ngram_iter_t *bitor) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)bitor->model; - lm3g_iter_t *from = (lm3g_iter_t *)bitor; - lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); - - itor->ug = from->ug; - switch (bitor->m) { - case 0: - /* Next itor bigrams is the same as this itor bigram or - itor bigrams is more than total count. This means no successors */ - if (((itor->ug + 1) - model->lm3g.unigrams < bitor->model->n_counts[0] && - itor->ug->bigrams == (itor->ug + 1)->bigrams) || - itor->ug->bigrams == bitor->model->n_counts[1]) - goto done; - - /* Start iterating from first bigram successor of from->ug. */ - itor->bg = model->lm3g.bigrams + itor->ug->bigrams; - break; - case 1: - itor->bg = from->bg; - - /* This indicates no successors */ - if (((itor->bg + 1) - model->lm3g.bigrams < bitor->model->n_counts[1] && - FIRST_TG (model, itor->bg - model->lm3g.bigrams) == - FIRST_TG (model, (itor->bg + 1) - model->lm3g.bigrams)) || - FIRST_TG (model, itor->bg - model->lm3g.bigrams) == bitor->model->n_counts[2]) - goto done; - - /* Start iterating from first trigram successor of from->bg. */ - itor->tg = (model->lm3g.trigrams - + FIRST_TG(model, (itor->bg - model->lm3g.bigrams))); -#if 0 - printf("%s %s => %d (%s)\n", - model->base.word_str[itor->ug - model->lm3g.unigrams], - model->base.word_str[itor->bg->wid], - FIRST_TG(model, (itor->bg - model->lm3g.bigrams)), - model->base.word_str[itor->tg->wid]); -#endif - break; - case 2: - default: - /* All invalid! */ - goto done; - } - - ngram_iter_init((ngram_iter_t *)itor, bitor->model, bitor->m + 1, TRUE); - return (ngram_iter_t *)itor; - done: - ckd_free(itor); - return NULL; -} - -static int32 const * -lm3g_template_iter_get(ngram_iter_t *base, - int32 *out_score, int32 *out_bowt) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model; - lm3g_iter_t *itor = (lm3g_iter_t *)base; - - base->wids[0] = itor->ug - model->lm3g.unigrams; - if (itor->bg) base->wids[1] = itor->bg->wid; - if (itor->tg) base->wids[2] = itor->tg->wid; -#if 0 - printf("itor_get: %d %d %d\n", base->wids[0], base->wids[1], base->wids[2]); -#endif - - switch (base->m) { - case 0: - *out_score = itor->ug->prob1.l; - *out_bowt = itor->ug->bo_wt1.l; - break; - case 1: - *out_score = model->lm3g.prob2[itor->bg->prob2].l; - if (model->lm3g.bo_wt2) - *out_bowt = model->lm3g.bo_wt2[itor->bg->bo_wt2].l; - else - *out_bowt = 0; - break; - case 2: - *out_score = model->lm3g.prob3[itor->tg->prob3].l; - *out_bowt = 0; - break; - default: /* Should not happen. */ - return NULL; - } - return base->wids; -} - -static ngram_iter_t * -lm3g_template_iter_next(ngram_iter_t *base) -{ - NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model; - lm3g_iter_t *itor = (lm3g_iter_t *)base; - - switch (base->m) { - case 0: - ++itor->ug; - /* Check for end condition. */ - if (itor->ug - model->lm3g.unigrams >= base->model->n_counts[0]) - goto done; - break; - case 1: - ++itor->bg; - /* Check for end condition. */ - if (itor->bg - model->lm3g.bigrams >= base->model->n_counts[1]) - goto done; - /* Advance unigram pointer if necessary in order to get one - * that points to this bigram. */ - while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) { - /* Stop if this is a successor iterator, since we don't - * want a new unigram. */ - if (base->successor) - goto done; - ++itor->ug; - if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) { - E_ERROR("Bigram %d has no valid unigram parent\n", - itor->bg - model->lm3g.bigrams); - goto done; - } - } - break; - case 2: - ++itor->tg; - /* Check for end condition. */ - if (itor->tg - model->lm3g.trigrams >= base->model->n_counts[2]) - goto done; - /* Advance bigram pointer if necessary. */ - while (itor->tg - model->lm3g.trigrams >= - FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))) { - if (base->successor) - goto done; - ++itor->bg; - if (itor->bg == model->lm3g.bigrams + base->model->n_counts[1]) { - E_ERROR("Trigram %d has no valid bigram parent\n", - itor->tg - model->lm3g.trigrams); - - goto done; - } - } - /* Advance unigram pointer if necessary. */ - while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) { - ++itor->ug; - if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) { - E_ERROR("Trigram %d has no valid unigram parent\n", - itor->tg - model->lm3g.trigrams); - goto done; - } - } - break; - default: /* Should not happen. */ - goto done; - } - - return (ngram_iter_t *)itor; -done: - ngram_iter_free(base); - return NULL; -} - -static void -lm3g_template_iter_free(ngram_iter_t *base) -{ - ckd_free(base); -} diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c deleted file mode 100644 index 02af4151b..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c +++ /dev/null @@ -1,1129 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file ngram_model.c N-Gram language models. - * - * Author: David Huggins-Daines, much code taken from sphinx3/src/libs3decoder/liblm - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <string.h> -#include <assert.h> - -#include "sphinxbase/ngram_model.h" -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/filename.h" -#include "sphinxbase/pio.h" -#include "sphinxbase/err.h" -#include "sphinxbase/logmath.h" -#include "sphinxbase/strfuncs.h" -#include "sphinxbase/case.h" - -#include "ngram_model_internal.h" - -ngram_file_type_t -ngram_file_name_to_type(const char *file_name) -{ - const char *ext; - - ext = strrchr(file_name, '.'); - if (ext == NULL) { - return NGRAM_INVALID; - } - if (0 == strcmp_nocase(ext, ".gz")) { - while (--ext >= file_name) { - if (*ext == '.') break; - } - if (ext < file_name) { - return NGRAM_INVALID; - } - } - else if (0 == strcmp_nocase(ext, ".bz2")) { - while (--ext >= file_name) { - if (*ext == '.') break; - } - if (ext < file_name) { - return NGRAM_INVALID; - } - } - /* We use strncmp because there might be a .gz on the end. */ - if (0 == strncmp_nocase(ext, ".ARPA", 5)) - return NGRAM_ARPA; - if (0 == strncmp_nocase(ext, ".DMP", 4)) - return NGRAM_DMP; - return NGRAM_INVALID; - } - -ngram_file_type_t -ngram_str_to_type(const char *str_name) -{ - if (0 == strcmp_nocase(str_name, "arpa")) - return NGRAM_ARPA; - if (0 == strcmp_nocase(str_name, "dmp")) - return NGRAM_DMP; - return NGRAM_INVALID; -} - -char const * -ngram_type_to_str(int type) -{ - switch (type) { - case NGRAM_ARPA: - return "arpa"; - case NGRAM_DMP: - return "dmp"; - default: - return NULL; - } -} - - - ngram_model_t * - ngram_model_read(cmd_ln_t *config, - const char *file_name, - ngram_file_type_t file_type, - logmath_t *lmath) - { - ngram_model_t *model = NULL; - - switch (file_type) { - case NGRAM_AUTO: { - if ((model = ngram_model_arpa_read(config, file_name, lmath)) != NULL) - break; - if ((model = ngram_model_dmp_read(config, file_name, lmath)) != NULL) - break; - return NULL; - } - case NGRAM_ARPA: - model = ngram_model_arpa_read(config, file_name, lmath); - break; - case NGRAM_DMP: - model = ngram_model_dmp_read(config, file_name, lmath); - break; - default: - E_ERROR("language model file type not supported\n"); - return NULL; - } - - /* Now set weights based on config if present. */ - if (config) { - float32 lw = 1.0; - float32 wip = 1.0; - float32 uw = 1.0; - - if (cmd_ln_exists_r(config, "-lw")) - lw = cmd_ln_float32_r(config, "-lw"); - if (cmd_ln_exists_r(config, "-wip")) - wip = cmd_ln_float32_r(config, "-wip"); - if (cmd_ln_exists_r(config, "-uw")) - uw = cmd_ln_float32_r(config, "-uw"); - - ngram_model_apply_weights(model, lw, wip, uw); - } - - return model; - } - - int - ngram_model_write(ngram_model_t *model, const char *file_name, - ngram_file_type_t file_type) - { - switch (file_type) { - case NGRAM_AUTO: { - file_type = ngram_file_name_to_type(file_name); - /* Default to ARPA (catches .lm and other things) */ - if (file_type == NGRAM_INVALID) - file_type = NGRAM_ARPA; - return ngram_model_write(model, file_name, file_type); - } - case NGRAM_ARPA: - return ngram_model_arpa_write(model, file_name); - case NGRAM_DMP: - return ngram_model_dmp_write(model, file_name); - default: - E_ERROR("language model file type not supported\n"); - return -1; - } - E_ERROR("language model file type not supported\n"); - return -1; - } - - int32 - ngram_model_init(ngram_model_t *base, - ngram_funcs_t *funcs, - logmath_t *lmath, - int32 n, int32 n_unigram) - { - base->refcount = 1; - base->funcs = funcs; - base->n = n; - /* If this was previously initialized... */ - if (base->n_counts == NULL) - base->n_counts = ckd_calloc(3, sizeof(*base->n_counts)); - /* Don't reset weights if logmath object hasn't changed. */ - if (base->lmath != lmath) { - /* Set default values for weights. */ - base->lw = 1.0; - base->log_wip = 0; /* i.e. 1.0 */ - base->log_uw = 0; /* i.e. 1.0 */ - base->log_uniform = logmath_log(lmath, 1.0 / n_unigram); - base->log_uniform_weight = logmath_get_zero(lmath); - base->log_zero = logmath_get_zero(lmath); - base->lmath = lmath; - } - /* Allocate or reallocate space for word strings. */ - if (base->word_str) { - /* Free all previous word strings if they were allocated. */ - if (base->writable) { - int32 i; - for (i = 0; i < base->n_words; ++i) { - ckd_free(base->word_str[i]); - base->word_str[i] = NULL; - } - } - base->word_str = ckd_realloc(base->word_str, n_unigram * sizeof(char *)); - } - else - base->word_str = ckd_calloc(n_unigram, sizeof(char *)); - /* NOTE: They are no longer case-insensitive since we are allowing - * other encodings for word strings. Beware. */ - if (base->wid) - hash_table_empty(base->wid); - else - base->wid = hash_table_new(n_unigram, FALSE); - base->n_counts[0] = base->n_1g_alloc = base->n_words = n_unigram; - - return 0; -} - -ngram_model_t * -ngram_model_retain(ngram_model_t *model) -{ - ++model->refcount; - return model; -} - - -void -ngram_model_flush(ngram_model_t *model) -{ - if (model->funcs && model->funcs->flush) - (*model->funcs->flush)(model); -} - -int -ngram_model_free(ngram_model_t *model) -{ - int i; - - if (model == NULL) - return 0; - if (--model->refcount > 0) - return model->refcount; - if (model->funcs && model->funcs->free) - (*model->funcs->free)(model); - if (model->writable) { - /* Free all words. */ - for (i = 0; i < model->n_words; ++i) { - ckd_free(model->word_str[i]); - } - } - else { - /* Free all class words. */ - for (i = 0; i < model->n_classes; ++i) { - ngram_class_t *lmclass; - int32 j; - - lmclass = model->classes[i]; - for (j = 0; j < lmclass->n_words; ++j) { - ckd_free(model->word_str[lmclass->start_wid + j]); - } - for (j = 0; j < lmclass->n_hash; ++j) { - if (lmclass->nword_hash[j].wid != -1) { - ckd_free(model->word_str[lmclass->nword_hash[j].wid]); - } - } - } - } - for (i = 0; i < model->n_classes; ++i) { - ngram_class_free(model->classes[i]); - } - ckd_free(model->classes); - hash_table_free(model->wid); - ckd_free(model->word_str); - ckd_free(model->n_counts); - ckd_free(model); - return 0; -} - -int -ngram_model_casefold(ngram_model_t *model, int kase) -{ - int writable, i; - hash_table_t *new_wid; - - /* Were word strings already allocated? */ - writable = model->writable; - /* Either way, we are going to allocate some word strings. */ - model->writable = TRUE; - - /* And, don't forget, we need to rebuild the word to unigram ID - * mapping. */ - new_wid = hash_table_new(model->n_words, FALSE); - for (i = 0; i < model->n_words; ++i) { - char *outstr; - if (writable) { - outstr = model->word_str[i]; - } - else { - outstr = ckd_salloc(model->word_str[i]); - } - /* Don't case-fold <tags> or [classes] */ - if (outstr[0] == '<' || outstr[0] == '[') { - } - else { - switch (kase) { - case NGRAM_UPPER: - ucase(outstr); - break; - case NGRAM_LOWER: - lcase(outstr); - break; - default: - ; - } - } - model->word_str[i] = outstr; - - /* Now update the hash table. We might have terrible - * collisions here, so warn about them. */ - if (hash_table_enter_int32(new_wid, model->word_str[i], i) != i) { - E_WARN("Duplicate word in dictionary after conversion: %s\n", - model->word_str[i]); - } - } - /* Swap out the hash table. */ - hash_table_free(model->wid); - model->wid = new_wid; - return 0; -} - -int -ngram_model_apply_weights(ngram_model_t *model, - float32 lw, float32 wip, float32 uw) -{ - return (*model->funcs->apply_weights)(model, lw, wip, uw); -} - -float32 -ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip, - int32 *out_log_uw) -{ - if (out_log_wip) *out_log_wip = model->log_wip; - if (out_log_uw) *out_log_uw = model->log_uw; - return model->lw; -} - - -int32 -ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history, - int32 n_hist, int32 *n_used) -{ - int32 score, class_weight = 0; - int i; - - /* Closed vocabulary, OOV word probability is zero */ - if (wid == NGRAM_INVALID_WID) - return model->log_zero; - - /* "Declassify" wid and history */ - if (NGRAM_IS_CLASSWID(wid)) { - ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; - - class_weight = ngram_class_prob(lmclass, wid); - if (class_weight == 1) /* Meaning, not found in class. */ - return model->log_zero; - wid = lmclass->tag_wid; - } - for (i = 0; i < n_hist; ++i) { - if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i])) - history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid; - } - score = (*model->funcs->score)(model, wid, history, n_hist, n_used); - - /* Multiply by unigram in-class weight. */ - return score + class_weight; -} - -int32 -ngram_score(ngram_model_t *model, const char *word, ...) -{ - va_list history; - const char *hword; - int32 *histid; - int32 n_hist; - int32 n_used; - int32 prob; - - va_start(history, word); - n_hist = 0; - while ((hword = va_arg(history, const char *)) != NULL) - ++n_hist; - va_end(history); - - histid = ckd_calloc(n_hist, sizeof(*histid)); - va_start(history, word); - n_hist = 0; - while ((hword = va_arg(history, const char *)) != NULL) { - histid[n_hist] = ngram_wid(model, hword); - ++n_hist; - } - va_end(history); - - prob = ngram_ng_score(model, ngram_wid(model, word), - histid, n_hist, &n_used); - ckd_free(histid); - return prob; -} - -int32 -ngram_tg_score(ngram_model_t *model, int32 w3, int32 w2, int32 w1, int32 *n_used) -{ - int32 hist[2]; - hist[0] = w2; - hist[1] = w1; - return ngram_ng_score(model, w3, hist, 2, n_used); -} - -int32 -ngram_bg_score(ngram_model_t *model, int32 w2, int32 w1, int32 *n_used) -{ - return ngram_ng_score(model, w2, &w1, 1, n_used); -} - -int32 -ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history, - int32 n_hist, int32 *n_used) -{ - int32 prob, class_weight = 0; - int i; - - /* Closed vocabulary, OOV word probability is zero */ - if (wid == NGRAM_INVALID_WID) - return model->log_zero; - - /* "Declassify" wid and history */ - if (NGRAM_IS_CLASSWID(wid)) { - ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; - - class_weight = ngram_class_prob(lmclass, wid); - if (class_weight == 1) /* Meaning, not found in class. */ - return class_weight; - wid = lmclass->tag_wid; - } - for (i = 0; i < n_hist; ++i) { - if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i])) - history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid; - } - prob = (*model->funcs->raw_score)(model, wid, history, - n_hist, n_used); - /* Multiply by unigram in-class weight. */ - return prob + class_weight; -} - -int32 -ngram_probv(ngram_model_t *model, const char *word, ...) -{ - va_list history; - const char *hword; - int32 *histid; - int32 n_hist; - int32 n_used; - int32 prob; - - va_start(history, word); - n_hist = 0; - while ((hword = va_arg(history, const char *)) != NULL) - ++n_hist; - va_end(history); - - histid = ckd_calloc(n_hist, sizeof(*histid)); - va_start(history, word); - n_hist = 0; - while ((hword = va_arg(history, const char *)) != NULL) { - histid[n_hist] = ngram_wid(model, hword); - ++n_hist; - } - va_end(history); - - prob = ngram_ng_prob(model, ngram_wid(model, word), - histid, n_hist, &n_used); - ckd_free(histid); - return prob; -} - -int32 -ngram_prob(ngram_model_t *model, const char *const *words, int32 n) -{ - int32 *ctx_id; - int32 nused; - int32 prob; - int32 wid; - uint32 i; - - ctx_id = (int32 *)ckd_calloc(n - 1, sizeof(*ctx_id)); - for (i = 1; i < n; ++i) - ctx_id[i - 1] = ngram_wid(model, words[i]); - - wid = ngram_wid(model, *words); - prob = ngram_ng_prob(model, wid, ctx_id, n - 1, &nused); - ckd_free(ctx_id); - - return prob; -} - -int32 -ngram_score_to_prob(ngram_model_t *base, int32 score) -{ - int32 prob; - - /* Undo insertion penalty. */ - prob = score - base->log_wip; - /* Undo language weight. */ - prob = (int32)(prob / base->lw); - - return prob; -} - -int32 -ngram_unknown_wid(ngram_model_t *model) -{ - int32 val; - - /* FIXME: This could be memoized for speed if necessary. */ - /* Look up <UNK>, if not found return NGRAM_INVALID_WID. */ - if (hash_table_lookup_int32(model->wid, "<UNK>", &val) == -1) - return NGRAM_INVALID_WID; - else - return val; -} - -int32 -ngram_zero(ngram_model_t *model) -{ - return model->log_zero; -} - -int32 -ngram_model_get_size(ngram_model_t *model) -{ - if (model != NULL) - return model->n; - return 0; -} - -int32 const * -ngram_model_get_counts(ngram_model_t *model) -{ - if (model != NULL) - return model->n_counts; - return NULL; -} - -void -ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model, - int m, int successor) -{ - itor->model = model; - itor->wids = ckd_calloc(model->n, sizeof(*itor->wids)); - itor->m = m; - itor->successor = successor; -} - -ngram_iter_t * -ngram_model_mgrams(ngram_model_t *model, int m) -{ - ngram_iter_t *itor; - /* The fact that m=n-1 is not exactly obvious. Prevent accidents. */ - if (m >= model->n) - return NULL; - if (model->funcs->mgrams == NULL) - return NULL; - itor = (*model->funcs->mgrams)(model, m); - return itor; -} - -ngram_iter_t * -ngram_iter(ngram_model_t *model, const char *word, ...) -{ - va_list history; - const char *hword; - int32 *histid; - int32 n_hist; - ngram_iter_t *itor; - - va_start(history, word); - n_hist = 0; - while ((hword = va_arg(history, const char *)) != NULL) - ++n_hist; - va_end(history); - - histid = ckd_calloc(n_hist, sizeof(*histid)); - va_start(history, word); - n_hist = 0; - while ((hword = va_arg(history, const char *)) != NULL) { - histid[n_hist] = ngram_wid(model, hword); - ++n_hist; - } - va_end(history); - - itor = ngram_ng_iter(model, ngram_wid(model, word), histid, n_hist); - ckd_free(histid); - return itor; -} - -ngram_iter_t * -ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist) -{ - if (n_hist >= model->n) - return NULL; - if (model->funcs->iter == NULL) - return NULL; - return (*model->funcs->iter)(model, wid, history, n_hist); -} - -ngram_iter_t * -ngram_iter_successors(ngram_iter_t *itor) -{ - /* Stop when we are at the highest order N-Gram. */ - if (itor->m == itor->model->n - 1) - return NULL; - return (*itor->model->funcs->successors)(itor); -} - -int32 const * -ngram_iter_get(ngram_iter_t *itor, - int32 *out_score, - int32 *out_bowt) -{ - return (*itor->model->funcs->iter_get)(itor, out_score, out_bowt); -} - -ngram_iter_t * -ngram_iter_next(ngram_iter_t *itor) -{ - return (*itor->model->funcs->iter_next)(itor); -} - -void -ngram_iter_free(ngram_iter_t *itor) -{ - ckd_free(itor->wids); - (*itor->model->funcs->iter_free)(itor); -} - -int32 -ngram_wid(ngram_model_t *model, const char *word) -{ - int32 val; - - if (hash_table_lookup_int32(model->wid, word, &val) == -1) - return ngram_unknown_wid(model); - else - return val; -} - -const char * -ngram_word(ngram_model_t *model, int32 wid) -{ - /* Remove any class tag */ - wid = NGRAM_BASEWID(wid); - if (wid >= model->n_words) - return NULL; - return model->word_str[wid]; -} - -/** - * Add a word to the word string and ID mapping. - */ -int32 -ngram_add_word_internal(ngram_model_t *model, - const char *word, - int32 classid) -{ - - /* Check for hash collisions. */ - int32 wid; - if (hash_table_lookup_int32(model->wid, word, &wid) == 0) { - E_WARN("Omit duplicate word '%s'\n", word); - return wid; - } - - /* Take the next available word ID */ - wid = model->n_words; - if (classid >= 0) { - wid = NGRAM_CLASSWID(wid, classid); - } - - /* Reallocate word_str if necessary. */ - if (model->n_words >= model->n_1g_alloc) { - model->n_1g_alloc += UG_ALLOC_STEP; - model->word_str = ckd_realloc(model->word_str, - sizeof(*model->word_str) * model->n_1g_alloc); - } - /* Add the word string in the appropriate manner. */ - /* Class words are always dynamically allocated. */ - model->word_str[model->n_words] = ckd_salloc(word); - /* Now enter it into the hash table. */ - if (hash_table_enter_int32(model->wid, model->word_str[model->n_words], wid) != wid) { - E_ERROR("Hash insertion failed for word %s => %p (should not happen)\n", - model->word_str[model->n_words], (void *)(long)(wid)); - } - /* Increment number of words. */ - ++model->n_words; - return wid; -} - -int32 -ngram_model_add_word(ngram_model_t *model, - const char *word, float32 weight) -{ - int32 wid, prob = model->log_zero; - - /* If we add word to unwritable model, we need to make it writable */ - if (!model->writable) { - E_WARN("Can't add word '%s' to read-only language model. " - "Disable mmap with '-mmap no' to make it writable\n", word); - return -1; - } - - wid = ngram_add_word_internal(model, word, -1); - if (wid == NGRAM_INVALID_WID) - return wid; - - /* Do what needs to be done to add the word to the unigram. */ - if (model->funcs && model->funcs->add_ug) - prob = (*model->funcs->add_ug)(model, wid, logmath_log(model->lmath, weight)); - if (prob == 0) - return -1; - - return wid; -} - -ngram_class_t * -ngram_class_new(ngram_model_t *model, int32 tag_wid, int32 start_wid, glist_t classwords) -{ - ngram_class_t *lmclass; - gnode_t *gn; - float32 tprob; - int i; - - lmclass = ckd_calloc(1, sizeof(*lmclass)); - lmclass->tag_wid = tag_wid; - /* wid_base is the wid (minus class tag) of the first word in the list. */ - lmclass->start_wid = start_wid; - lmclass->n_words = glist_count(classwords); - lmclass->prob1 = ckd_calloc(lmclass->n_words, sizeof(*lmclass->prob1)); - lmclass->nword_hash = NULL; - lmclass->n_hash = 0; - tprob = 0.0; - for (gn = classwords; gn; gn = gnode_next(gn)) { - tprob += gnode_float32(gn); - } - if (tprob > 1.1 || tprob < 0.9) { - E_INFO("Total class probability is %f, will normalize\n", tprob); - for (gn = classwords; gn; gn = gnode_next(gn)) { - gn->data.fl /= tprob; - } - } - for (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) { - lmclass->prob1[i] = logmath_log(model->lmath, gnode_float32(gn)); - } - - return lmclass; -} - -int32 -ngram_class_add_word(ngram_class_t *lmclass, int32 wid, int32 lweight) -{ - int32 hash; - - if (lmclass->nword_hash == NULL) { - /* Initialize everything in it to -1 */ - lmclass->nword_hash = ckd_malloc(NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash)); - memset(lmclass->nword_hash, 0xff, NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash)); - lmclass->n_hash = NGRAM_HASH_SIZE; - lmclass->n_hash_inuse = 0; - } - /* Stupidest possible hash function. This will work pretty well - * when this function is called repeatedly with contiguous word - * IDs, though... */ - hash = wid & (lmclass->n_hash - 1); - if (lmclass->nword_hash[hash].wid == -1) { - /* Good, no collision. */ - lmclass->nword_hash[hash].wid = wid; - lmclass->nword_hash[hash].prob1 = lweight; - ++lmclass->n_hash_inuse; - return hash; - } - else { - int32 next; /**< Next available bucket. */ - /* Collision... Find the end of the hash chain. */ - while (lmclass->nword_hash[hash].next != -1) - hash = lmclass->nword_hash[hash].next; - assert(hash != -1); - /* Does we has any more bukkit? */ - if (lmclass->n_hash_inuse == lmclass->n_hash) { - /* Oh noes! Ok, we makes more. */ - lmclass->nword_hash = ckd_realloc(lmclass->nword_hash, - lmclass->n_hash * 2 * sizeof(*lmclass->nword_hash)); - memset(lmclass->nword_hash + lmclass->n_hash, - 0xff, lmclass->n_hash * sizeof(*lmclass->nword_hash)); - /* Just use the next allocated one (easy) */ - next = lmclass->n_hash; - lmclass->n_hash *= 2; - } - else { - /* Look for any available bucket. We hope this doesn't happen. */ - for (next = 0; next < lmclass->n_hash; ++next) - if (lmclass->nword_hash[next].wid == -1) - break; - /* This should absolutely not happen. */ - assert(next != lmclass->n_hash); - } - lmclass->nword_hash[next].wid = wid; - lmclass->nword_hash[next].prob1 = lweight; - lmclass->nword_hash[hash].next = next; - ++lmclass->n_hash_inuse; - return next; - } -} - -void -ngram_class_free(ngram_class_t *lmclass) -{ - ckd_free(lmclass->nword_hash); - ckd_free(lmclass->prob1); - ckd_free(lmclass); -} - -int32 -ngram_model_add_class_word(ngram_model_t *model, - const char *classname, - const char *word, - float32 weight) -{ - ngram_class_t *lmclass; - int32 classid, tag_wid, wid, i, scale; - float32 fprob; - - /* Find the class corresponding to classname. Linear search - * probably okay here since there won't be very many classes, and - * this doesn't have to be fast. */ - tag_wid = ngram_wid(model, classname); - if (tag_wid == NGRAM_INVALID_WID) { - E_ERROR("No such word or class tag: %s\n", classname); - return tag_wid; - } - for (classid = 0; classid < model->n_classes; ++classid) { - if (model->classes[classid]->tag_wid == tag_wid) - break; - } - /* Hmm, no such class. It's probably not a good idea to create one. */ - if (classid == model->n_classes) { - E_ERROR("Word %s is not a class tag (call ngram_model_add_class() first)\n", classname); - return NGRAM_INVALID_WID; - } - lmclass = model->classes[classid]; - - /* Add this word to the model's set of words. */ - wid = ngram_add_word_internal(model, word, classid); - if (wid == NGRAM_INVALID_WID) - return wid; - - /* This is the fixed probability of the new word. */ - fprob = weight * 1.0f / (lmclass->n_words + lmclass->n_hash_inuse + 1); - /* Now normalize everything else to fit it in. This is - * accomplished by simply scaling all the other probabilities - * by (1-fprob). */ - scale = logmath_log(model->lmath, 1.0 - fprob); - for (i = 0; i < lmclass->n_words; ++i) - lmclass->prob1[i] += scale; - for (i = 0; i < lmclass->n_hash; ++i) - if (lmclass->nword_hash[i].wid != -1) - lmclass->nword_hash[i].prob1 += scale; - - /* Now add it to the class hash table. */ - return ngram_class_add_word(lmclass, wid, logmath_log(model->lmath, fprob)); -} - -int32 -ngram_model_add_class(ngram_model_t *model, - const char *classname, - float32 classweight, - char **words, - const float32 *weights, - int32 n_words) -{ - ngram_class_t *lmclass; - glist_t classwords = NULL; - int32 i, start_wid = -1; - int32 classid, tag_wid; - - /* Check if classname already exists in model. If not, add it.*/ - if ((tag_wid = ngram_wid(model, classname)) == ngram_unknown_wid(model)) { - tag_wid = ngram_model_add_word(model, classname, classweight); - if (tag_wid == NGRAM_INVALID_WID) - return -1; - } - - if (model->n_classes == 128) { - E_ERROR("Number of classes cannot exceed 128 (sorry)\n"); - return -1; - } - classid = model->n_classes; - for (i = 0; i < n_words; ++i) { - int32 wid; - - wid = ngram_add_word_internal(model, words[i], classid); - if (wid == NGRAM_INVALID_WID) - return -1; - if (start_wid == -1) - start_wid = NGRAM_BASEWID(wid); - classwords = glist_add_float32(classwords, weights[i]); - } - classwords = glist_reverse(classwords); - lmclass = ngram_class_new(model, tag_wid, start_wid, classwords); - glist_free(classwords); - if (lmclass == NULL) - return -1; - - ++model->n_classes; - if (model->classes == NULL) - model->classes = ckd_calloc(1, sizeof(*model->classes)); - else - model->classes = ckd_realloc(model->classes, - model->n_classes * sizeof(*model->classes)); - model->classes[classid] = lmclass; - return classid; -} - -int32 -ngram_class_prob(ngram_class_t *lmclass, int32 wid) -{ - int32 base_wid = NGRAM_BASEWID(wid); - - if (base_wid < lmclass->start_wid - || base_wid > lmclass->start_wid + lmclass->n_words) { - int32 hash; - - /* Look it up in the hash table. */ - hash = wid & (lmclass->n_hash - 1); - while (hash != -1 && lmclass->nword_hash[hash].wid != wid) - hash = lmclass->nword_hash[hash].next; - if (hash == -1) - return 1; - return lmclass->nword_hash[hash].prob1; - } - else { - return lmclass->prob1[base_wid - lmclass->start_wid]; - } -} - -int32 -read_classdef_file(hash_table_t *classes, const char *file_name) -{ - FILE *fp; - int32 is_pipe; - int inclass; /**< Are we currently reading a list of class words? */ - int32 rv = -1; - gnode_t *gn; - glist_t classwords = NULL; - glist_t classprobs = NULL; - char *classname = NULL; - - if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { - E_ERROR("File %s not found\n", file_name); - return -1; - } - - inclass = FALSE; - while (!feof(fp)) { - char line[512]; - char *wptr[2]; - int n_words; - - if (fgets(line, sizeof(line), fp) == NULL) - break; - - n_words = str2words(line, wptr, 2); - if (n_words <= 0) - continue; - - if (inclass) { - /* Look for an end of class marker. */ - if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { - classdef_t *classdef; - gnode_t *word, *weight; - int32 i; - - if (classname == NULL || 0 != strcmp(wptr[1], classname)) - goto error_out; - inclass = FALSE; - - /* Construct a class from the list of words collected. */ - classdef = ckd_calloc(1, sizeof(*classdef)); - classwords = glist_reverse(classwords); - classprobs = glist_reverse(classprobs); - classdef->n_words = glist_count(classwords); - classdef->words = ckd_calloc(classdef->n_words, - sizeof(*classdef->words)); - classdef->weights = ckd_calloc(classdef->n_words, - sizeof(*classdef->weights)); - word = classwords; - weight = classprobs; - for (i = 0; i < classdef->n_words; ++i) { - classdef->words[i] = gnode_ptr(word); - classdef->weights[i] = gnode_float32(weight); - word = gnode_next(word); - weight = gnode_next(weight); - } - - /* Add this class to the hash table. */ - if (hash_table_enter(classes, classname, classdef) != classdef) { - classdef_free(classdef); - goto error_out; - } - - /* Reset everything. */ - glist_free(classwords); - glist_free(classprobs); - classwords = NULL; - classprobs = NULL; - classname = NULL; - } - else { - float32 fprob; - - if (n_words == 2) - fprob = (float32)atof_c(wptr[1]); - else - fprob = 1.0f; - /* Add it to the list of words for this class. */ - classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); - classprobs = glist_add_float32(classprobs, fprob); - } - } - else { - /* Start a new LM class if the LMCLASS marker is seen */ - if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { - if (inclass) - goto error_out; - inclass = TRUE; - classname = ckd_salloc(wptr[1]); - } - /* Otherwise, just ignore whatever junk we got */ - } - } - rv = 0; /* Success. */ - -error_out: - /* Free all the stuff we might have allocated. */ - fclose_comp(fp, is_pipe); - for (gn = classwords; gn; gn = gnode_next(gn)) - ckd_free(gnode_ptr(gn)); - glist_free(classwords); - glist_free(classprobs); - ckd_free(classname); - - return rv; -} - -void -classdef_free(classdef_t *classdef) -{ - int32 i; - for (i = 0; i < classdef->n_words; ++i) - ckd_free(classdef->words[i]); - ckd_free(classdef->words); - ckd_free(classdef->weights); - ckd_free(classdef); -} - - -int32 -ngram_model_read_classdef(ngram_model_t *model, - const char *file_name) -{ - hash_table_t *classes; - glist_t hl = NULL; - gnode_t *gn; - int32 rv = -1; - - classes = hash_table_new(0, FALSE); - if (read_classdef_file(classes, file_name) < 0) { - hash_table_free(classes); - return -1; - } - - /* Create a new class in the language model for each classdef. */ - hl = hash_table_tolist(classes, NULL); - for (gn = hl; gn; gn = gnode_next(gn)) { - hash_entry_t *he = gnode_ptr(gn); - classdef_t *classdef = he->val; - - if (ngram_model_add_class(model, he->key, 1.0, - classdef->words, - classdef->weights, - classdef->n_words) < 0) - goto error_out; - } - rv = 0; - -error_out: - for (gn = hl; gn; gn = gnode_next(gn)) { - hash_entry_t *he = gnode_ptr(gn); - ckd_free((char *)he->key); - classdef_free(he->val); - } - glist_free(hl); - hash_table_free(classes); - return rv; -} diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c deleted file mode 100644 index a4b72cb00..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c +++ /dev/null @@ -1,660 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file ngram_model_arpa.c ARPA format language models - * - * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#include "sphinxbase/ckd_alloc.h" -#include <string.h> -#include <limits.h> -#include <assert.h> - -#include "sphinxbase/err.h" -#include "sphinxbase/pio.h" -#include "sphinxbase/listelem_alloc.h" -#include "sphinxbase/strfuncs.h" - -#include "ngram_model_arpa.h" - -static ngram_funcs_t ngram_model_arpa_funcs; - -#define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ]) -#define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams) -#define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams)) - -/* - * Read and return #unigrams, #bigrams, #trigrams as stated in input file. - */ -static int -ReadNgramCounts(lineiter_t **li, int32 * n_ug, int32 * n_bg, int32 * n_tg) -{ - int32 ngram, ngram_cnt; - - /* skip file until past the '\data\' marker */ - while (*li) { - string_trim((*li)->buf, STRING_BOTH); - if (strcmp((*li)->buf, "\\data\\") == 0) - break; - *li = lineiter_next(*li); - } - if (*li == NULL || strcmp((*li)->buf, "\\data\\") != 0) { - E_INFO("No \\data\\ mark in LM file\n"); - return -1; - } - - *n_ug = *n_bg = *n_tg = 0; - while ((*li = lineiter_next(*li))) { - if (sscanf((*li)->buf, "ngram %d=%d", &ngram, &ngram_cnt) != 2) - break; - switch (ngram) { - case 1: - *n_ug = ngram_cnt; - break; - case 2: - *n_bg = ngram_cnt; - break; - case 3: - *n_tg = ngram_cnt; - break; - default: - E_ERROR("Unknown ngram (%d)\n", ngram); - return -1; - } - } - if (*li == NULL) { - E_ERROR("EOF while reading ngram counts\n"); - return -1; - } - - /* Position iterator to the unigrams header '\1-grams:\' */ - while ((*li = lineiter_next(*li))) { - string_trim((*li)->buf, STRING_BOTH); - if (strcmp((*li)->buf, "\\1-grams:") == 0) - break; - } - if (*li == NULL) { - E_ERROR_SYSTEM("Failed to read \\1-grams: mark"); - return -1; - } - - if ((*n_ug <= 0) || (*n_bg < 0) || (*n_tg < 0)) { - E_ERROR("Bad or missing ngram count\n"); - return -1; - } - return 0; -} - -/* - * Read in the unigrams from given file into the LM structure model. - * On entry to this procedure, the iterator is positioned to the - * header line '\1-grams:'. - */ -static int -ReadUnigrams(lineiter_t **li, ngram_model_arpa_t * model) -{ - ngram_model_t *base = &model->base; - int32 wcnt; - float p1; - - E_INFO("Reading unigrams\n"); - - wcnt = 0; - while ((*li = lineiter_next(*li))) { - char *wptr[3], *name; - float32 bo_wt = 0.0f; - int n; - - string_trim((*li)->buf, STRING_BOTH); - if (strcmp((*li)->buf, "\\2-grams:") == 0 - || strcmp((*li)->buf, "\\end\\") == 0) - break; - - if ((n = str2words((*li)->buf, wptr, 3)) < 2) { - if ((*li)->buf[0] != '\0') - E_WARN("Format error; unigram ignored: %s\n", (*li)->buf); - continue; - } - else { - p1 = (float)atof_c(wptr[0]); - name = wptr[1]; - if (n == 3) - bo_wt = (float)atof_c(wptr[2]); - } - - if (wcnt >= base->n_counts[0]) { - E_ERROR("Too many unigrams\n"); - return -1; - } - - /* Associate name with word id */ - base->word_str[wcnt] = ckd_salloc(name); - if ((hash_table_enter(base->wid, base->word_str[wcnt], (void *)(long)wcnt)) - != (void *)(long)wcnt) { - E_WARN("Duplicate word in dictionary: %s\n", base->word_str[wcnt]); - } - model->lm3g.unigrams[wcnt].prob1.l = logmath_log10_to_log(base->lmath, p1); - model->lm3g.unigrams[wcnt].bo_wt1.l = logmath_log10_to_log(base->lmath, bo_wt); - wcnt++; - } - - if (base->n_counts[0] != wcnt) { - E_WARN("lm_t.ucount(%d) != #unigrams read(%d)\n", - base->n_counts[0], wcnt); - base->n_counts[0] = wcnt; - base->n_words = wcnt; - } - return 0; -} - -/* - * Read bigrams from given file into given model structure. - */ -static int -ReadBigrams(lineiter_t **li, ngram_model_arpa_t * model) -{ - ngram_model_t *base = &model->base; - int32 w1, w2, prev_w1, bgcount; - bigram_t *bgptr; - - E_INFO("Reading bigrams\n"); - - bgcount = 0; - bgptr = model->lm3g.bigrams; - prev_w1 = -1; - - while ((*li = lineiter_next(*li))) { - float32 p, bo_wt = 0.0f; - int32 p2, bo_wt2; - char *wptr[4], *word1, *word2; - int n; - - string_trim((*li)->buf, STRING_BOTH); - wptr[3] = NULL; - if ((n = str2words((*li)->buf, wptr, 4)) < 3) { - if ((*li)->buf[0] != '\0') - break; - continue; - } - else { - p = (float32)atof_c(wptr[0]); - word1 = wptr[1]; - word2 = wptr[2]; - if (wptr[3]) - bo_wt = (float32)atof_c(wptr[3]); - } - - if ((w1 = ngram_wid(base, word1)) == NGRAM_INVALID_WID) { - E_ERROR("Unknown word: %s, skipping bigram (%s %s)\n", - word1, word1, word2); - continue; - } - if ((w2 = ngram_wid(base, word2)) == NGRAM_INVALID_WID) { - E_ERROR("Unknown word: %s, skipping bigram (%s %s)\n", - word2, word1, word2); - continue; - } - - /* FIXME: Should use logmath_t quantization here. */ - /* HACK!! to quantize probs to 4 decimal digits */ - p = (float32)((int32)(p * 10000)) / 10000; - bo_wt = (float32)((int32)(bo_wt * 10000)) / 10000; - - p2 = logmath_log10_to_log(base->lmath, p); - bo_wt2 = logmath_log10_to_log(base->lmath, bo_wt); - - if (bgcount >= base->n_counts[1]) { - E_ERROR("Too many bigrams\n"); - return -1; - } - - bgptr->wid = w2; - bgptr->prob2 = sorted_id(&model->sorted_prob2, &p2); - if (base->n_counts[2] > 0) - bgptr->bo_wt2 = sorted_id(&model->sorted_bo_wt2, &bo_wt2); - - if (w1 != prev_w1) { - if (w1 < prev_w1) { - E_ERROR("Bigram %s %s not in unigram order word id: %d prev word id: %d\n", word1, word2, w1, prev_w1); - return -1; - } - - for (prev_w1++; prev_w1 <= w1; prev_w1++) - model->lm3g.unigrams[prev_w1].bigrams = bgcount; - prev_w1 = w1; - } - bgcount++; - bgptr++; - - if ((bgcount & 0x0000ffff) == 0) { - E_INFOCONT("."); - } - } - if (*li == NULL || ((strcmp((*li)->buf, "\\end\\") != 0) - && (strcmp((*li)->buf, "\\3-grams:") != 0))) { - E_ERROR("Bad bigram: %s\n", (*li)->buf); - return -1; - } - - for (prev_w1++; prev_w1 <= base->n_counts[0]; prev_w1++) - model->lm3g.unigrams[prev_w1].bigrams = bgcount; - - return 0; -} - -/* - * Very similar to ReadBigrams. - */ -static int -ReadTrigrams(lineiter_t **li, ngram_model_arpa_t * model) -{ - ngram_model_t *base = &model->base; - int32 i, w1, w2, w3, prev_w1, prev_w2, tgcount, prev_bg, bg, endbg; - int32 seg, prev_seg, prev_seg_lastbg; - trigram_t *tgptr; - bigram_t *bgptr; - - E_INFO("Reading trigrams\n"); - - tgcount = 0; - tgptr = model->lm3g.trigrams; - prev_w1 = -1; - prev_w2 = -1; - prev_bg = -1; - prev_seg = -1; - - while ((*li = lineiter_next(*li))) { - float32 p; - int32 p3; - char *wptr[4], *word1, *word2, *word3; - - string_trim((*li)->buf, STRING_BOTH); - if (str2words((*li)->buf, wptr, 4) != 4) { - if ((*li)->buf[0] != '\0') - break; - continue; - } - else { - p = (float32)atof_c(wptr[0]); - word1 = wptr[1]; - word2 = wptr[2]; - word3 = wptr[3]; - } - - if ((w1 = ngram_wid(base, word1)) == NGRAM_INVALID_WID) { - E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n", - word1, word1, word2, word3); - continue; - } - if ((w2 = ngram_wid(base, word2)) == NGRAM_INVALID_WID) { - E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n", - word2, word1, word2, word3); - continue; - } - if ((w3 = ngram_wid(base, word3)) == NGRAM_INVALID_WID) { - E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n", - word3, word1, word2, word3); - continue; - } - - /* FIXME: Should use logmath_t quantization here. */ - /* HACK!! to quantize probs to 4 decimal digits */ - p = (float32)((int32)(p * 10000)) / 10000; - p3 = logmath_log10_to_log(base->lmath, p); - - if (tgcount >= base->n_counts[2]) { - E_ERROR("Too many trigrams\n"); - return -1; - } - - tgptr->wid = w3; - tgptr->prob3 = sorted_id(&model->sorted_prob3, &p3); - - if ((w1 != prev_w1) || (w2 != prev_w2)) { - /* Trigram for a new bigram; update tg info for all previous bigrams */ - if ((w1 < prev_w1) || ((w1 == prev_w1) && (w2 < prev_w2))) { - E_ERROR("Trigrams not in bigram order\n"); - return -1; - } - - bg = (w1 != - prev_w1) ? model->lm3g.unigrams[w1].bigrams : prev_bg + 1; - endbg = model->lm3g.unigrams[w1 + 1].bigrams; - bgptr = model->lm3g.bigrams + bg; - for (; (bg < endbg) && (bgptr->wid != w2); bg++, bgptr++); - if (bg >= endbg) { - E_ERROR("Missing bigram for trigram: %s", (*li)->buf); - return -1; - } - - /* bg = bigram entry index for <w1,w2>. Update tseg_base */ - seg = bg >> LOG_BG_SEG_SZ; - for (i = prev_seg + 1; i <= seg; i++) - model->lm3g.tseg_base[i] = tgcount; - - /* Update trigrams pointers for all bigrams until bg */ - if (prev_seg < seg) { - int32 tgoff = 0; - - if (prev_seg >= 0) { - tgoff = tgcount - model->lm3g.tseg_base[prev_seg]; - if (tgoff > 65535) { - E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n"); - return -1; - } - } - - prev_seg_lastbg = ((prev_seg + 1) << LOG_BG_SEG_SZ) - 1; - bgptr = model->lm3g.bigrams + prev_bg; - for (++prev_bg, ++bgptr; prev_bg <= prev_seg_lastbg; - prev_bg++, bgptr++) - bgptr->trigrams = tgoff; - - for (; prev_bg <= bg; prev_bg++, bgptr++) - bgptr->trigrams = 0; - } - else { - int32 tgoff; - - tgoff = tgcount - model->lm3g.tseg_base[prev_seg]; - if (tgoff > 65535) { - E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n"); - return -1; - } - - bgptr = model->lm3g.bigrams + prev_bg; - for (++prev_bg, ++bgptr; prev_bg <= bg; prev_bg++, bgptr++) - bgptr->trigrams = tgoff; - } - - prev_w1 = w1; - prev_w2 = w2; - prev_bg = bg; - prev_seg = seg; - } - - tgcount++; - tgptr++; - - if ((tgcount & 0x0000ffff) == 0) { - E_INFOCONT("."); - } - } - if (*li == NULL || strcmp((*li)->buf, "\\end\\") != 0) { - E_ERROR("Bad trigram: %s\n", (*li)->buf); - return -1; - } - - for (prev_bg++; prev_bg <= base->n_counts[1]; prev_bg++) { - if ((prev_bg & (BG_SEG_SZ - 1)) == 0) - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ] = tgcount; - if ((tgcount - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ]) > 65535) { - E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n"); - return -1; - } - model->lm3g.bigrams[prev_bg].trigrams = - tgcount - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ]; - } - return 0; -} - -static unigram_t * -new_unigram_table(int32 n_ug) -{ - unigram_t *table; - int32 i; - - table = ckd_calloc(n_ug, sizeof(unigram_t)); - for (i = 0; i < n_ug; i++) { - table[i].prob1.l = INT_MIN; - table[i].bo_wt1.l = INT_MIN; - } - return table; -} - -ngram_model_t * -ngram_model_arpa_read(cmd_ln_t *config, - const char *file_name, - logmath_t *lmath) -{ - lineiter_t *li; - FILE *fp; - int32 is_pipe; - int32 n_unigram; - int32 n_bigram; - int32 n_trigram; - int32 n; - ngram_model_arpa_t *model; - ngram_model_t *base; - - if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { - E_ERROR("File %s not found\n", file_name); - return NULL; - } - li = lineiter_start(fp); - - /* Read #unigrams, #bigrams, #trigrams from file */ - if (ReadNgramCounts(&li, &n_unigram, &n_bigram, &n_trigram) == -1) { - lineiter_free(li); - fclose_comp(fp, is_pipe); - return NULL; - } - E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram); - - /* Allocate space for LM, including initial OOVs and placeholders; initialize it */ - model = ckd_calloc(1, sizeof(*model)); - base = &model->base; - if (n_trigram > 0) - n = 3; - else if (n_bigram > 0) - n = 2; - else - n = 1; - /* Initialize base model. */ - ngram_model_init(base, &ngram_model_arpa_funcs, lmath, n, n_unigram); - base->n_counts[0] = n_unigram; - base->n_counts[1] = n_bigram; - base->n_counts[2] = n_trigram; - base->writable = TRUE; - - /* - * Allocate one extra unigram and bigram entry: sentinels to terminate - * followers (bigrams and trigrams, respectively) of previous entry. - */ - model->lm3g.unigrams = new_unigram_table(n_unigram + 1); - model->lm3g.bigrams = - ckd_calloc(n_bigram + 1, sizeof(bigram_t)); - if (n_trigram > 0) - model->lm3g.trigrams = - ckd_calloc(n_trigram, sizeof(trigram_t)); - - if (n_trigram > 0) { - model->lm3g.tseg_base = - ckd_calloc((n_bigram + 1) / BG_SEG_SZ + 1, - sizeof(int32)); - } - if (ReadUnigrams(&li, model) == -1) { - fclose_comp(fp, is_pipe); - ngram_model_free(base); - return NULL; - } - E_INFO("%8d = #unigrams created\n", base->n_counts[0]); - - if (base->n_counts[2] > 0) - init_sorted_list(&model->sorted_bo_wt2); - - if (base->n_counts[1] > 0) { - init_sorted_list(&model->sorted_prob2); - - if (ReadBigrams(&li, model) == -1) { - fclose_comp(fp, is_pipe); - ngram_model_free(base); - return NULL; - } - - base->n_counts[1] = FIRST_BG(model, base->n_counts[0]); - model->lm3g.n_prob2 = model->sorted_prob2.free; - model->lm3g.prob2 = vals_in_sorted_list(&model->sorted_prob2); - free_sorted_list(&model->sorted_prob2); - E_INFO("%8d = #bigrams created\n", base->n_counts[1]); - E_INFO("%8d = #prob2 entries\n", model->lm3g.n_prob2); - } - - if (base->n_counts[2] > 0) { - /* Create trigram bo-wts array */ - model->lm3g.n_bo_wt2 = model->sorted_bo_wt2.free; - model->lm3g.bo_wt2 = vals_in_sorted_list(&model->sorted_bo_wt2); - free_sorted_list(&model->sorted_bo_wt2); - E_INFO("%8d = #bo_wt2 entries\n", model->lm3g.n_bo_wt2); - - init_sorted_list(&model->sorted_prob3); - - if (ReadTrigrams(&li, model) == -1) { - fclose_comp(fp, is_pipe); - ngram_model_free(base); - return NULL; - } - - base->n_counts[2] = FIRST_TG(model, base->n_counts[1]); - model->lm3g.n_prob3 = model->sorted_prob3.free; - model->lm3g.prob3 = vals_in_sorted_list(&model->sorted_prob3); - E_INFO("%8d = #trigrams created\n", base->n_counts[2]); - E_INFO("%8d = #prob3 entries\n", model->lm3g.n_prob3); - - free_sorted_list(&model->sorted_prob3); - - /* Initialize tginfo */ - model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *)); - model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t)); - } - - lineiter_free(li); - fclose_comp(fp, is_pipe); - return base; -} - -int -ngram_model_arpa_write(ngram_model_t *model, - const char *file_name) -{ - ngram_iter_t *itor; - FILE *fh; - int i; - - if ((fh = fopen(file_name, "w")) == NULL) { - E_ERROR_SYSTEM("Failed to open %s for writing", file_name); - return -1; - } - fprintf(fh, "This is an ARPA-format language model file, generated by CMU Sphinx\n"); - - /* The ARPA format doesn't require any extra information that - * N-Gram iterators can't give us, so this is very - * straightforward compared with DMP writing. */ - - /* Write N-gram counts. */ - fprintf(fh, "\\data\\\n"); - for (i = 0; i < model->n; ++i) { - fprintf(fh, "ngram %d=%d\n", i+1, model->n_counts[i]); - } - - /* Write N-grams */ - for (i = 0; i < model->n; ++i) { - fprintf(fh, "\n\\%d-grams:\n", i + 1); - for (itor = ngram_model_mgrams(model, i); itor; itor = ngram_iter_next(itor)) { - int32 const *wids; - int32 score, bowt; - int j; - - wids = ngram_iter_get(itor, &score, &bowt); - fprintf(fh, "%.4f ", logmath_log_to_log10(model->lmath, score)); - for (j = 0; j <= i; ++j) { - assert(wids[j] < model->n_counts[0]); - fprintf(fh, "%s ", model->word_str[wids[j]]); - } - if (i < model->n-1) - fprintf(fh, "%.4f", logmath_log_to_log10(model->lmath, bowt)); - fprintf(fh, "\n"); - } - } - fprintf(fh, "\n\\end\\\n"); - return fclose(fh); -} - -static int -ngram_model_arpa_apply_weights(ngram_model_t *base, float32 lw, - float32 wip, float32 uw) -{ - ngram_model_arpa_t *model = (ngram_model_arpa_t *)base; - lm3g_apply_weights(base, &model->lm3g, lw, wip, uw); - return 0; -} - -/* Lousy "templating" for things that are largely the same in DMP and - * ARPA models, except for the bigram and trigram types and some - * names. */ -#define NGRAM_MODEL_TYPE ngram_model_arpa_t -#include "lm3g_templates.c" - -static void -ngram_model_arpa_free(ngram_model_t *base) -{ - ngram_model_arpa_t *model = (ngram_model_arpa_t *)base; - ckd_free(model->lm3g.unigrams); - ckd_free(model->lm3g.bigrams); - ckd_free(model->lm3g.trigrams); - ckd_free(model->lm3g.prob2); - ckd_free(model->lm3g.bo_wt2); - ckd_free(model->lm3g.prob3); - lm3g_tginfo_free(base, &model->lm3g); - ckd_free(model->lm3g.tseg_base); -} - -static ngram_funcs_t ngram_model_arpa_funcs = { - ngram_model_arpa_free, /* free */ - ngram_model_arpa_apply_weights, /* apply_weights */ - lm3g_template_score, /* score */ - lm3g_template_raw_score, /* raw_score */ - lm3g_template_add_ug, /* add_ug */ - lm3g_template_flush, /* flush */ - lm3g_template_iter, /* iter */ - lm3g_template_mgrams, /* mgrams */ - lm3g_template_successors, /* successors */ - lm3g_template_iter_get, /* iter_get */ - lm3g_template_iter_next, /* iter_next */ - lm3g_template_iter_free /* iter_free */ -}; diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h deleted file mode 100644 index 2fd9e427d..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h +++ /dev/null @@ -1,86 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file ngram_model_arpa.h ARPABO text format for N-Gram models - * - * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#ifndef __NGRAM_MODEL_ARPA_H__ -#define __NGRAM_MODEL_ARPA_H__ - -#include "ngram_model_internal.h" -#include "lm3g_model.h" - -/** - * Bigram structure. - */ -struct bigram_s { - uint32 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ - uint16 prob2; /**< Index into array of actual bigram probs */ - uint16 bo_wt2; /**< Index into array of actual bigram backoff wts */ - uint16 trigrams; /**< Index of 1st entry in lm_t.trigrams[], - RELATIVE TO its segment base (see above) */ -}; - -/** - * Trigram structure. - * - * As with bigrams, trigram prob info kept in a separate table for conserving - * memory space. - */ -struct trigram_s { - uint32 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ - uint16 prob3; /**< Index into array of actual trigram probs */ -}; - - -/** - * Subclass of ngram_model for ARPA file reading. - */ -typedef struct ngram_model_arpa_s { - ngram_model_t base; /**< Base ngram_model_t structure */ - lm3g_model_t lm3g; /**< Shared lm3g structure */ - - /* Arrays of unique bigram probs and bo-wts, and trigram probs - * (these are temporary, actually) */ - sorted_list_t sorted_prob2; - sorted_list_t sorted_bo_wt2; - sorted_list_t sorted_prob3; -} ngram_model_arpa_t; - -#endif /* __NGRAM_MODEL_ARPA_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c deleted file mode 100644 index c6a2d8b85..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c +++ /dev/null @@ -1,969 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file ngram_model_dmp.c DMP format language models - * - * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#include <assert.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <limits.h> - -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/pio.h" -#include "sphinxbase/err.h" -#include "sphinxbase/byteorder.h" -#include "sphinxbase/listelem_alloc.h" - -#include "ngram_model_dmp.h" - -static const char darpa_hdr[] = "Darpa Trigram LM"; -static ngram_funcs_t ngram_model_dmp_funcs; - -#define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ]) -#define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams) -#define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams)) - -static unigram_t * -new_unigram_table(int32 n_ug) -{ - unigram_t *table; - int32 i; - - table = ckd_calloc(n_ug, sizeof(unigram_t)); - for (i = 0; i < n_ug; i++) { - table[i].prob1.f = -99.0; - table[i].bo_wt1.f = -99.0; - } - return table; -} - -ngram_model_t * -ngram_model_dmp_read(cmd_ln_t *config, - const char *file_name, - logmath_t *lmath) -{ - ngram_model_t *base; - ngram_model_dmp_t *model; - FILE *fp; - int do_mmap, do_swap; - int32 is_pipe; - int32 i, j, k, vn, n, ts; - int32 n_unigram; - int32 n_bigram; - int32 n_trigram; - char str[1024]; - unigram_t *ugptr; - bigram_t *bgptr; - trigram_t *tgptr; - char *tmp_word_str; - char *map_base = NULL; - size_t offset = 0; - - base = NULL; - do_mmap = FALSE; - if (config) - do_mmap = cmd_ln_boolean_r(config, "-mmap"); - - if ((fp = fopen_comp(file_name, "rb", &is_pipe)) == NULL) { - E_ERROR("Dump file %s not found\n", file_name); - goto error_out; - } - - if (is_pipe && do_mmap) { - E_WARN("Dump file is compressed, will not use memory-mapped I/O\n"); - do_mmap = 0; - } - - do_swap = FALSE; - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (k != strlen(darpa_hdr)+1) { - SWAP_INT32(&k); - if (k != strlen(darpa_hdr)+1) { - E_ERROR("Wrong magic header size number %x: %s is not a dump file\n", k, file_name); - goto error_out; - } - do_swap = 1; - } - if (fread(str, 1, k, fp) != (size_t) k) { - E_ERROR("Cannot read header\n"); - goto error_out; - } - if (strncmp(str, darpa_hdr, k) != 0) { - E_ERROR("Wrong header %s: %s is not a dump file\n", darpa_hdr); - goto error_out; - } - - if (do_mmap) { - if (do_swap) { - E_INFO - ("Byteswapping required, will not use memory-mapped I/O for LM file\n"); - do_mmap = 0; - } - else { - E_INFO("Will use memory-mapped I/O for LM file\n"); -#ifdef __ADSPBLACKFIN__ /* This is true for both VisualDSP++ and uClinux. */ - E_FATAL("memory mapping is not supported at the moment."); -#else -#endif - } - } - - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - if (fread(str, 1, k, fp) != (size_t) k) { - E_ERROR("Cannot read LM filename in header\n"); - goto error_out; - } - - /* read version#, if present (must be <= 0) */ - if (fread(&vn, sizeof(vn), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&vn); - if (vn <= 0) { - /* read and don't compare timestamps (we don't care) */ - if (fread(&ts, sizeof(ts), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&ts); - - /* read and skip format description */ - for (;;) { - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - if (k == 0) - break; - if (fread(str, 1, k, fp) != (size_t) k) { - E_ERROR("Failed to read word\n"); - goto error_out; - } - } - /* read model->ucount */ - if (fread(&n_unigram, sizeof(n_unigram), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&n_unigram); - } - else { - n_unigram = vn; - } - - /* read model->bcount, tcount */ - if (fread(&n_bigram, sizeof(n_bigram), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&n_bigram); - if (fread(&n_trigram, sizeof(n_trigram), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&n_trigram); - E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram); - - /* Allocate space for LM, including initial OOVs and placeholders; initialize it */ - model = ckd_calloc(1, sizeof(*model)); - base = &model->base; - if (n_trigram > 0) - n = 3; - else if (n_bigram > 0) - n = 2; - else - n = 1; - ngram_model_init(base, &ngram_model_dmp_funcs, lmath, n, n_unigram); - base->n_counts[0] = n_unigram; - base->n_counts[1] = n_bigram; - base->n_counts[2] = n_trigram; - - /* read unigrams (always in memory, as they contain dictionary - * mappings that can't be precomputed, and also could have OOVs added) */ - model->lm3g.unigrams = new_unigram_table(n_unigram + 1); - ugptr = model->lm3g.unigrams; - for (i = 0; i <= n_unigram; ++i) { - /* Skip over the mapping ID, we don't care about it. */ - if (fread(ugptr, sizeof(int32), 1, fp) != 1) { - E_ERROR("Failed to read maping id %d\n", i); - goto error_out; - } - /* Read the actual unigram structure. */ - if (fread(ugptr, sizeof(unigram_t), 1, fp) != 1) { - E_ERROR("Failed to read unigrams data\n"); - ngram_model_free(base); - fclose_comp(fp, is_pipe); - return NULL; - } - /* Byte swap if necessary. */ - if (do_swap) { - SWAP_INT32(&ugptr->prob1.l); - SWAP_INT32(&ugptr->bo_wt1.l); - SWAP_INT32(&ugptr->bigrams); - } - /* Convert values to log. */ - ugptr->prob1.l = logmath_log10_to_log(lmath, ugptr->prob1.f); - ugptr->bo_wt1.l = logmath_log10_to_log(lmath, ugptr->bo_wt1.f); - E_DEBUG(2, ("ug %d: prob %d bo %d bigrams %d\n", - i, ugptr->prob1.l, ugptr->bo_wt1.l, ugptr->bigrams)); - ++ugptr; - } - E_INFO("%8d = LM.unigrams(+trailer) read\n", n_unigram); - - /* Now mmap() the file and read in the rest of the (read-only) stuff. */ - if (do_mmap) { - offset = ftell(fp); - - /* Check for improper word alignment. */ - if (offset & 0x3) { - E_WARN("-mmap specified, but trigram index is not word-aligned. Will not memory-map.\n"); - do_mmap = FALSE; - } - else { - model->dump_mmap = mmio_file_read(file_name); - if (model->dump_mmap == NULL) { - do_mmap = FALSE; - } - else { - map_base = mmio_file_ptr(model->dump_mmap); - } - } - } - - if (n_bigram > 0) { - /* read bigrams */ - if (do_mmap) { - model->lm3g.bigrams = (bigram_t *) (map_base + offset); - offset += (n_bigram + 1) * sizeof(bigram_t); - } - else { - model->lm3g.bigrams = - ckd_calloc(n_bigram + 1, sizeof(bigram_t)); - if (fread(model->lm3g.bigrams, sizeof(bigram_t), n_bigram + 1, fp) - != (size_t) n_bigram + 1) { - E_ERROR("Failed to read bigrams data\n"); - goto error_out; - } - if (do_swap) { - for (i = 0, bgptr = model->lm3g.bigrams; i <= n_bigram; - i++, bgptr++) { - SWAP_INT16(&bgptr->wid); - SWAP_INT16(&bgptr->prob2); - SWAP_INT16(&bgptr->bo_wt2); - SWAP_INT16(&bgptr->trigrams); - } - } - } - E_INFO("%8d = LM.bigrams(+trailer) read\n", n_bigram); - } - - /* read trigrams */ - if (n_trigram > 0) { - if (do_mmap) { - model->lm3g.trigrams = (trigram_t *) (map_base + offset); - offset += n_trigram * sizeof(trigram_t); - } - else { - model->lm3g.trigrams = - ckd_calloc(n_trigram, sizeof(trigram_t)); - if (fread - (model->lm3g.trigrams, sizeof(trigram_t), n_trigram, fp) - != (size_t) n_trigram) { - E_ERROR("Failed to read trigrams data\n"); - goto error_out; - } - if (do_swap) { - for (i = 0, tgptr = model->lm3g.trigrams; i < n_trigram; - i++, tgptr++) { - SWAP_INT16(&tgptr->wid); - SWAP_INT16(&tgptr->prob3); - } - } - } - E_INFO("%8d = LM.trigrams read\n", n_trigram); - /* Initialize tginfo */ - model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *)); - model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t)); - } - - if (n_bigram > 0) { - /* read n_prob2 and prob2 array (in memory) */ - if (do_mmap) - fseek(fp, offset, SEEK_SET); - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - model->lm3g.n_prob2 = k; - model->lm3g.prob2 = ckd_calloc(k, sizeof(*model->lm3g.prob2)); - if (fread(model->lm3g.prob2, sizeof(*model->lm3g.prob2), k, fp) != (size_t) k) { - E_ERROR("fread(prob2) failed\n"); - goto error_out; - } - for (i = 0; i < k; i++) { - if (do_swap) - SWAP_INT32(&model->lm3g.prob2[i].l); - /* Convert values to log. */ - model->lm3g.prob2[i].l = logmath_log10_to_log(lmath, model->lm3g.prob2[i].f); - } - E_INFO("%8d = LM.prob2 entries read\n", k); - } - - /* read n_bo_wt2 and bo_wt2 array (in memory) */ - if (base->n > 2) { - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - model->lm3g.n_bo_wt2 = k; - model->lm3g.bo_wt2 = ckd_calloc(k, sizeof(*model->lm3g.bo_wt2)); - if (fread(model->lm3g.bo_wt2, sizeof(*model->lm3g.bo_wt2), k, fp) != (size_t) k) { - E_ERROR("Failed to read backoff weights\n"); - goto error_out; - } - for (i = 0; i < k; i++) { - if (do_swap) - SWAP_INT32(&model->lm3g.bo_wt2[i].l); - /* Convert values to log. */ - model->lm3g.bo_wt2[i].l = logmath_log10_to_log(lmath, model->lm3g.bo_wt2[i].f); - } - E_INFO("%8d = LM.bo_wt2 entries read\n", k); - } - - /* read n_prob3 and prob3 array (in memory) */ - if (base->n > 2) { - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - model->lm3g.n_prob3 = k; - model->lm3g.prob3 = ckd_calloc(k, sizeof(*model->lm3g.prob3)); - if (fread(model->lm3g.prob3, sizeof(*model->lm3g.prob3), k, fp) != (size_t) k) { - E_ERROR("Failed to read trigram probability\n"); - goto error_out; - } - for (i = 0; i < k; i++) { - if (do_swap) - SWAP_INT32(&model->lm3g.prob3[i].l); - /* Convert values to log. */ - model->lm3g.prob3[i].l = logmath_log10_to_log(lmath, model->lm3g.prob3[i].f); - } - E_INFO("%8d = LM.prob3 entries read\n", k); - } - - /* read tseg_base size and tseg_base */ - if (do_mmap) - offset = ftell(fp); - if (n_trigram > 0) { - if (do_mmap) { - memcpy(&k, map_base + offset, sizeof(k)); - offset += sizeof(int32); - model->lm3g.tseg_base = (int32 *) (map_base + offset); - offset += k * sizeof(int32); - } - else { - k = (n_bigram + 1) / BG_SEG_SZ + 1; - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - model->lm3g.tseg_base = ckd_calloc(k, sizeof(int32)); - if (fread(model->lm3g.tseg_base, sizeof(int32), k, fp) != - (size_t) k) { - E_ERROR("Failed to read trigram index\n"); - goto error_out; - } - if (do_swap) - for (i = 0; i < k; i++) - SWAP_INT32(&model->lm3g.tseg_base[i]); - } - E_INFO("%8d = LM.tseg_base entries read\n", k); - } - - /* read ascii word strings */ - if (do_mmap) { - memcpy(&k, map_base + offset, sizeof(k)); - offset += sizeof(int32); - tmp_word_str = (char *) (map_base + offset); - offset += k; - } - else { - base->writable = TRUE; - if (fread(&k, sizeof(k), 1, fp) != 1) - goto error_out; - if (do_swap) SWAP_INT32(&k); - tmp_word_str = ckd_calloc(k, 1); - if (fread(tmp_word_str, 1, k, fp) != (size_t) k) { - E_ERROR("Failed to read words\n"); - goto error_out; - } - } - - /* First make sure string just read contains n_counts[0] words (PARANOIA!!) */ - for (i = 0, j = 0; i < k; i++) - if (tmp_word_str[i] == '\0') - j++; - if (j != n_unigram) { - E_ERROR("Error reading word strings (%d doesn't match n_unigrams %d)\n", - j, n_unigram); - goto error_out; - } - - /* Break up string just read into words */ - if (do_mmap) { - j = 0; - for (i = 0; i < n_unigram; i++) { - base->word_str[i] = tmp_word_str + j; - if (hash_table_enter(base->wid, base->word_str[i], - (void *)(long)i) != (void *)(long)i) { - E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); - } - j += strlen(base->word_str[i]) + 1; - } - } - else { - j = 0; - for (i = 0; i < n_unigram; i++) { - base->word_str[i] = ckd_salloc(tmp_word_str + j); - if (hash_table_enter(base->wid, base->word_str[i], - (void *)(long)i) != (void *)(long)i) { - E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); - } - j += strlen(base->word_str[i]) + 1; - } - free(tmp_word_str); - } - E_INFO("%8d = ascii word strings read\n", i); - - fclose_comp(fp, is_pipe); - return base; - -error_out: - if (fp) - fclose_comp(fp, is_pipe); - ngram_model_free(base); - return NULL; -} - -ngram_model_dmp_t * -ngram_model_dmp_build(ngram_model_t *base) -{ - ngram_model_dmp_t *model; - ngram_model_t *newbase; - ngram_iter_t *itor; - sorted_list_t sorted_prob2; - sorted_list_t sorted_bo_wt2; - sorted_list_t sorted_prob3; - bigram_t *bgptr; - trigram_t *tgptr; - int i, bgcount, tgcount, seg; - - if (base->funcs == &ngram_model_dmp_funcs) { - E_INFO("Using existing DMP model.\n"); - return (ngram_model_dmp_t *)ngram_model_retain(base); - } - - /* Initialize new base model structure with params from base. */ - E_INFO("Building DMP model...\n"); - model = ckd_calloc(1, sizeof(*model)); - newbase = &model->base; - ngram_model_init(newbase, &ngram_model_dmp_funcs, - logmath_retain(base->lmath), - base->n, base->n_counts[0]); - /* Copy N-gram counts over. */ - memcpy(newbase->n_counts, base->n_counts, - base->n * sizeof(*base->n_counts)); - /* Make sure word strings are freed. */ - newbase->writable = TRUE; - /* Initialize unigram table and string table. */ - model->lm3g.unigrams = new_unigram_table(newbase->n_counts[0] + 1); - for (itor = ngram_model_mgrams(base, 0); itor; - itor = ngram_iter_next(itor)) { - int32 prob1, bo_wt1; - int32 const *wids; - - /* Can't guarantee they will go in unigram order, so just to - * be correct, we do this... */ - wids = ngram_iter_get(itor, &prob1, &bo_wt1); - model->lm3g.unigrams[wids[0]].prob1.l = prob1; - model->lm3g.unigrams[wids[0]].bo_wt1.l = bo_wt1; - newbase->word_str[wids[0]] = ckd_salloc(ngram_word(base, wids[0])); - if ((hash_table_enter_int32(newbase->wid, - newbase->word_str[wids[0]], wids[0])) - != wids[0]) { - E_WARN("Duplicate word in dictionary: %s\n", newbase->word_str[wids[0]]); - } - } - E_INFO("%8d = #unigrams created\n", newbase->n_counts[0]); - - if (newbase->n < 2) - return model; - - /* Construct quantized probability table for bigrams and - * (optionally) trigrams. Hesitate to use the "sorted list" thing - * since it isn't so useful, but it's there already. */ - init_sorted_list(&sorted_prob2); - if (newbase->n > 2) { - init_sorted_list(&sorted_bo_wt2); - init_sorted_list(&sorted_prob3); - } - /* Construct bigram and trigram arrays. */ - bgptr = model->lm3g.bigrams = ckd_calloc(newbase->n_counts[1] + 1, sizeof(bigram_t)); - if (newbase->n > 2) { - tgptr = model->lm3g.trigrams = ckd_calloc(newbase->n_counts[2], sizeof(trigram_t)); - model->lm3g.tseg_base = - ckd_calloc((newbase->n_counts[1] + 1) / BG_SEG_SZ + 1, sizeof(int32)); - } - else - tgptr = NULL; - /* Since bigrams and trigrams have to be contiguous with others - * with the same N-1-gram, we traverse them in depth-first order - * to build the bigram and trigram arrays. */ - for (i = 0; i < newbase->n_counts[0]; ++i) { - ngram_iter_t *uitor; - bgcount = bgptr - model->lm3g.bigrams; - /* First bigram index (same as next if no bigrams...) */ - model->lm3g.unigrams[i].bigrams = bgcount; - E_DEBUG(2, ("unigram %d: %s => bigram %d\n", i, newbase->word_str[i], bgcount)); - /* All bigrams corresponding to unigram i */ - uitor = ngram_ng_iter(base, i, NULL, 0); - for (itor = ngram_iter_successors(uitor); - itor; ++bgptr, itor = ngram_iter_next(itor)) { - int32 prob2, bo_wt2; - int32 const *wids; - ngram_iter_t *titor; - - wids = ngram_iter_get(itor, &prob2, &bo_wt2); - - assert (bgptr - model->lm3g.bigrams < newbase->n_counts[1]); - - bgptr->wid = wids[1]; - bgptr->prob2 = sorted_id(&sorted_prob2, &prob2); - if (newbase->n > 2) { - tgcount = (tgptr - model->lm3g.trigrams); - bgcount = (bgptr - model->lm3g.bigrams); - - /* Backoff weight (only if there are trigrams...) */ - bgptr->bo_wt2 = sorted_id(&sorted_bo_wt2, &bo_wt2); - - /* Find bigram segment for this bigram (this isn't - * used unless there are trigrams) */ - seg = bgcount >> LOG_BG_SEG_SZ; - /* If we just crossed a bigram segment boundary, then - * point tseg_base for the new segment to the current - * trigram pointer. */ - if (seg != (bgcount - 1) >> LOG_BG_SEG_SZ) - model->lm3g.tseg_base[seg] = tgcount; - /* Now calculate the trigram offset. */ - bgptr->trigrams = tgcount - model->lm3g.tseg_base[seg]; - E_DEBUG(2, ("bigram %d %s %s => trigram %d:%d\n", - bgcount, - newbase->word_str[wids[0]], - newbase->word_str[wids[1]], - seg, bgptr->trigrams)); - - /* And fill in successors' trigram info. */ - for (titor = ngram_iter_successors(itor); - titor; ++tgptr, titor = ngram_iter_next(titor)) { - int32 prob3, dummy; - - assert(tgptr - model->lm3g.trigrams < newbase->n_counts[2]); - wids = ngram_iter_get(titor, &prob3, &dummy); - tgptr->wid = wids[2]; - tgptr->prob3 = sorted_id(&sorted_prob3, &prob3); - E_DEBUG(2, ("trigram %d %s %s %s => prob %d\n", - tgcount, - newbase->word_str[wids[0]], - newbase->word_str[wids[1]], - newbase->word_str[wids[2]], - tgptr->prob3)); - } - } - } - ngram_iter_free(uitor); - } - /* Add sentinal unigram and bigram records. */ - bgcount = bgptr - model->lm3g.bigrams; - tgcount = tgptr - model->lm3g.trigrams; - seg = bgcount >> LOG_BG_SEG_SZ; - if (seg != (bgcount - 1) >> LOG_BG_SEG_SZ) - model->lm3g.tseg_base[seg] = tgcount; - model->lm3g.unigrams[i].bigrams = bgcount; - if (newbase->n > 2) - bgptr->trigrams = tgcount - model->lm3g.tseg_base[seg]; - - /* Now create probability tables. */ - model->lm3g.n_prob2 = sorted_prob2.free; - model->lm3g.prob2 = vals_in_sorted_list(&sorted_prob2); - E_INFO("%8d = #bigrams created\n", newbase->n_counts[1]); - E_INFO("%8d = #prob2 entries\n", model->lm3g.n_prob2); - free_sorted_list(&sorted_prob2); - if (newbase->n > 2) { - /* Create trigram bo-wts array. */ - model->lm3g.n_bo_wt2 = sorted_bo_wt2.free; - model->lm3g.bo_wt2 = vals_in_sorted_list(&sorted_bo_wt2); - free_sorted_list(&sorted_bo_wt2); - E_INFO("%8d = #bo_wt2 entries\n", model->lm3g.n_bo_wt2); - /* Create trigram probability table. */ - model->lm3g.n_prob3 = sorted_prob3.free; - model->lm3g.prob3 = vals_in_sorted_list(&sorted_prob3); - E_INFO("%8d = #trigrams created\n", newbase->n_counts[2]); - E_INFO("%8d = #prob3 entries\n", model->lm3g.n_prob3); - free_sorted_list(&sorted_prob3); - /* Initialize tginfo */ - model->lm3g.tginfo = ckd_calloc(newbase->n_counts[0], sizeof(tginfo_t *)); - model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t)); - } - - return model; -} - -static void -fwrite_int32(FILE *fh, int32 val) -{ - fwrite(&val, 4, 1, fh); -} - -static void -fwrite_ug(FILE *fh, unigram_t *ug, logmath_t *lmath) -{ - int32 bogus = -1; - float32 log10val; - - /* Bogus dictionary mapping field. */ - fwrite(&bogus, 4, 1, fh); - /* Convert values to log10. */ - log10val = logmath_log_to_log10(lmath, ug->prob1.l); - fwrite(&log10val, 4, 1, fh); - log10val = logmath_log_to_log10(lmath, ug->bo_wt1.l); - fwrite(&log10val, 4, 1, fh); - fwrite_int32(fh, ug->bigrams); -} - -static void -fwrite_bg(FILE *fh, bigram_t *bg) -{ - fwrite(bg, sizeof(*bg), 1, fh); -} - -static void -fwrite_tg(FILE *fh, trigram_t *tg) -{ - fwrite(tg, sizeof(*tg), 1, fh); -} - -/** Please look at the definition of - */ -static char const *fmtdesc[] = { - "BEGIN FILE FORMAT DESCRIPTION", - "Header string length (int32) and string (including trailing 0)", - "Original LM filename string-length (int32) and filename (including trailing 0)", - "(int32) version number (present iff value <= 0)", - "(int32) original LM file modification timestamp (iff version# present)", - "(int32) string-length and string (including trailing 0) (iff version# present)", - "... previous entry continued any number of times (iff version# present)", - "(int32) 0 (terminating sequence of strings) (iff version# present)", - "(int32) log_bg_seg_sz (present iff different from default value of LOG2_BG_SEG_SZ)", - "(int32) lm_t.ucount (must be > 0)", - "(int32) lm_t.bcount", - "(int32) lm_t.tcount", - "lm_t.ucount+1 unigrams (including sentinel)", - "lm_t.bcount+1 bigrams (including sentinel 64 bits (bg_t) each if version=-1/-2, 128 bits (bg32_t) each if version=-3", - "lm_t.tcount trigrams (present iff lm_t.tcount > 0 32 bits (tg_t) each if version=-1/-2, 64 bits (tg32_t) each if version=-3)", - "(int32) lm_t.n_prob2", - "(int32) lm_t.prob2[]", - "(int32) lm_t.n_bo_wt2 (present iff lm_t.tcount > 0)", - "(int32) lm_t.bo_wt2[] (present iff lm_t.tcount > 0)", - "(int32) lm_t.n_prob3 (present iff lm_t.tcount > 0)", - "(int32) lm_t.prob3[] (present iff lm_t.tcount > 0)", - "(int32) (lm_t.bcount+1)/BG_SEG_SZ+1 (present iff lm_t.tcount > 0)", - "(int32) lm_t.tseg_base[] (present iff lm_t.tcount > 0)", - "(int32) Sum(all word string-lengths, including trailing 0 for each)", - "All word strings (including trailing 0 for each)", - "END FILE FORMAT DESCRIPTION", - NULL, -}; - -static void -ngram_model_dmp_write_header(FILE * fh) -{ - int32 k; - k = strlen(darpa_hdr) + 1; - fwrite_int32(fh, k); - fwrite(darpa_hdr, 1, k, fh); -} - -static void -ngram_model_dmp_write_lm_filename(FILE * fh, const char *lmfile) -{ - int32 k; - - k = strlen(lmfile) + 1; - fwrite_int32(fh, k); - fwrite(lmfile, 1, k, fh); -} - -#define LMDMP_VERSION_TG_16BIT -1 /**< VERSION 1 is the simplest DMP file which - is trigram or lower which used 16 bits in - bigram and trigram.*/ - -static void -ngram_model_dmp_write_version(FILE * fh, int32 mtime) -{ - fwrite_int32(fh, LMDMP_VERSION_TG_16BIT); /* version # */ - fwrite_int32(fh, mtime); -} - -static void -ngram_model_dmp_write_ngram_counts(FILE * fh, ngram_model_t *model) -{ - fwrite_int32(fh, model->n_counts[0]); - fwrite_int32(fh, model->n_counts[1]); - fwrite_int32(fh, model->n_counts[2]); -} - -static void -ngram_model_dmp_write_fmtdesc(FILE * fh) -{ - int32 i, k; - long pos; - - /* Write file format description into header */ - for (i = 0; fmtdesc[i] != NULL; i++) { - k = strlen(fmtdesc[i]) + 1; - fwrite_int32(fh, k); - fwrite(fmtdesc[i], 1, k, fh); - } - /* Pad it out in order to achieve 32-bit alignment */ - pos = ftell(fh); - k = pos & 3; - if (k) { - fwrite_int32(fh, 4-k); - fwrite("!!!!", 1, 4-k, fh); - } - fwrite_int32(fh, 0); -} - -static void -ngram_model_dmp_write_unigram(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i; - - for (i = 0; i <= model->n_counts[0]; i++) { - fwrite_ug(fh, &(lm->lm3g.unigrams[i]), model->lmath); - } -} - - -static void -ngram_model_dmp_write_bigram(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i; - - for (i = 0; i <= model->n_counts[1]; i++) { - fwrite_bg(fh, &(lm->lm3g.bigrams[i])); - } - -} - -static void -ngram_model_dmp_write_trigram(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i; - - for (i = 0; i < model->n_counts[2]; i++) { - fwrite_tg(fh, &(lm->lm3g.trigrams[i])); - } -} - -static void -ngram_model_dmp_write_bgprob(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i; - - fwrite_int32(fh, lm->lm3g.n_prob2); - for (i = 0; i < lm->lm3g.n_prob2; i++) { - float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.prob2[i].l); - fwrite(&log10val, 4, 1, fh); - } -} - -static void -ngram_model_dmp_write_tgbowt(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i; - - fwrite_int32(fh, lm->lm3g.n_bo_wt2); - for (i = 0; i < lm->lm3g.n_bo_wt2; i++) { - float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.bo_wt2[i].l); - fwrite(&log10val, 4, 1, fh); - } -} - -static void -ngram_model_dmp_write_tgprob(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i; - - fwrite_int32(fh, lm->lm3g.n_prob3); - for (i = 0; i < lm->lm3g.n_prob3; i++) { - float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.prob3[i].l); - fwrite(&log10val, 4, 1, fh); - } -} - -static void -ngram_model_dmp_write_tg_segbase(FILE *fh, ngram_model_t *model) -{ - ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; - int32 i, k; - - k = (model->n_counts[1] + 1) / BG_SEG_SZ + 1; - fwrite_int32(fh, k); - for (i = 0; i < k; i++) - fwrite_int32(fh, lm->lm3g.tseg_base[i]); -} - -static void -ngram_model_dmp_write_wordstr(FILE *fh, ngram_model_t *model) -{ - int32 i, k; - - k = 0; - for (i = 0; i < model->n_counts[0]; i++) - k += strlen(model->word_str[i]) + 1; - fwrite_int32(fh, k); - for (i = 0; i < model->n_counts[0]; i++) - fwrite(model->word_str[i], 1, - strlen(model->word_str[i]) + 1, fh); -} - -int -ngram_model_dmp_write(ngram_model_t *base, - const char *file_name) -{ - ngram_model_dmp_t *model; - ngram_model_t *newbase; - FILE *fh; - - /* First, construct a DMP model from the base model. */ - model = ngram_model_dmp_build(base); - newbase = &model->base; - - /* Now write it, confident in the knowledge that it's the right - * kind of language model internally. */ - if ((fh = fopen(file_name, "wb")) == NULL) { - E_ERROR("Cannot create file %s\n", file_name); - return -1; - } - ngram_model_dmp_write_header(fh); - ngram_model_dmp_write_lm_filename(fh, file_name); - ngram_model_dmp_write_version(fh, 0); - ngram_model_dmp_write_fmtdesc(fh); - ngram_model_dmp_write_ngram_counts(fh, newbase); - ngram_model_dmp_write_unigram(fh, newbase); - if (newbase->n > 1) { - ngram_model_dmp_write_bigram(fh, newbase); - if (newbase->n > 2) { - ngram_model_dmp_write_trigram(fh, newbase); - } - ngram_model_dmp_write_bgprob(fh, newbase); - if (newbase->n > 2) { - ngram_model_dmp_write_tgbowt(fh, newbase); - ngram_model_dmp_write_tgprob(fh, newbase); - ngram_model_dmp_write_tg_segbase(fh, newbase); - } - } - ngram_model_dmp_write_wordstr(fh, newbase); - ngram_model_free(newbase); - - return fclose(fh); -} - -static int -ngram_model_dmp_apply_weights(ngram_model_t *base, float32 lw, - float32 wip, float32 uw) -{ - ngram_model_dmp_t *model = (ngram_model_dmp_t *)base; - lm3g_apply_weights(base, &model->lm3g, lw, wip, uw); - return 0; -} - -/* Lousy "templating" for things that are largely the same in DMP and - * ARPA models, except for the bigram and trigram types and some - * names. */ -#define NGRAM_MODEL_TYPE ngram_model_dmp_t -#include "lm3g_templates.c" - -static void -ngram_model_dmp_free(ngram_model_t *base) -{ - ngram_model_dmp_t *model = (ngram_model_dmp_t *)base; - - ckd_free(model->lm3g.unigrams); - ckd_free(model->lm3g.prob2); - if (model->dump_mmap) { - mmio_file_unmap(model->dump_mmap); - } - else { - ckd_free(model->lm3g.bigrams); - if (base->n > 2) { - ckd_free(model->lm3g.trigrams); - ckd_free(model->lm3g.tseg_base); - } - } - if (base->n > 2) { - ckd_free(model->lm3g.bo_wt2); - ckd_free(model->lm3g.prob3); - } - - lm3g_tginfo_free(base, &model->lm3g); -} - -static ngram_funcs_t ngram_model_dmp_funcs = { - ngram_model_dmp_free, /* free */ - ngram_model_dmp_apply_weights, /* apply_weights */ - lm3g_template_score, /* score */ - lm3g_template_raw_score, /* raw_score */ - lm3g_template_add_ug, /* add_ug */ - lm3g_template_flush, /* flush */ - lm3g_template_iter, /* iter */ - lm3g_template_mgrams, /* mgrams */ - lm3g_template_successors, /* successors */ - lm3g_template_iter_get, /* iter_get */ - lm3g_template_iter_next, /* iter_next */ - lm3g_template_iter_free /* iter_free */ -}; diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h deleted file mode 100644 index a3b141ad1..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h +++ /dev/null @@ -1,92 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file ngram_model_dmp.h DMP format for N-Gram models - * - * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#ifndef __NGRAM_MODEL_DMP_H__ -#define __NGRAM_MODEL_DMP_H__ - -#include "sphinxbase/mmio.h" - -#include "ngram_model_internal.h" -#include "lm3g_model.h" - -/** - * On-disk representation of bigrams. - */ -struct bigram_s { - uint16 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ - uint16 prob2; /**< Index into array of actual bigram probs */ - uint16 bo_wt2; /**< Index into array of actual bigram backoff wts */ - uint16 trigrams; /**< Index of 1st entry in lm_t.trigrams[], - RELATIVE TO its segment base (see lm3g_model.h) */ -}; - -/** - * On-disk representation of trigrams. - * - * As with bigrams, trigram prob info kept in a separate table for conserving - * memory space. - */ -struct trigram_s { - uint16 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ - uint16 prob3; /**< Index into array of actual trigram probs */ -}; - -/** - * Subclass of ngram_model for DMP file reading. - */ -typedef struct ngram_model_dmp_s { - ngram_model_t base; /**< Base ngram_model_t structure */ - lm3g_model_t lm3g; /**< Common lm3g_model_t structure */ - mmio_file_t *dump_mmap; /**< mmap() of dump file (or NULL if none) */ -} ngram_model_dmp_t; - -/** - * Construct a DMP format model from a generic base model. - * - * Note: If base is already a DMP format model, this just calls - * ngram_model_retain(), and any changes will also be made in the base - * model. - */ -ngram_model_dmp_t *ngram_model_dmp_build(ngram_model_t *base); - - -#endif /* __NGRAM_MODEL_DMP_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h deleted file mode 100644 index dcc7b5ae3..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h +++ /dev/null @@ -1,282 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/* - * \file ngram_model_internal.h Internal structures for N-Gram models - * - * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#ifndef __NGRAM_MODEL_INTERNAL_H__ -#define __NGRAM_MODEL_INTERNAL_H__ - -#include "sphinxbase/ngram_model.h" -#include "sphinxbase/hash_table.h" - -/** - * Common implementation of ngram_model_t. - * - * The details of bigram, trigram, and higher-order N-gram storage, if any, can - * vary somewhat depending on the file format in use. - */ -struct ngram_model_s { - int refcount; /**< Reference count */ - int32 *n_counts; /**< Counts for 1, 2, 3, ... grams */ - int32 n_1g_alloc; /**< Number of allocated word strings (for new word addition) */ - int32 n_words; /**< Number of actual word strings (NOT the same as the - number of unigrams, due to class words). */ - uint8 n; /**< This is an n-gram model (1, 2, 3, ...). */ - uint8 n_classes; /**< Number of classes (maximum 128) */ - uint8 writable; /**< Are word strings writable? */ - uint8 flags; /**< Any other flags we might care about - (FIXME: Merge this and writable) */ - logmath_t *lmath; /**< Log-math object */ - float32 lw; /**< Language model scaling factor */ - int32 log_wip; /**< Log of word insertion penalty */ - int32 log_uw; /**< Log of unigram weight */ - int32 log_uniform; /**< Log of uniform (0-gram) probability */ - int32 log_uniform_weight; /**< Log of uniform weight (i.e. 1 - unigram weight) */ - int32 log_zero; /**< Zero probability, cached here for quick lookup */ - char **word_str; /**< Unigram names */ - hash_table_t *wid; /**< Mapping of unigram names to word IDs. */ - int32 *tmp_wids; /**< Temporary array of word IDs for ngram_model_get_ngram() */ - struct ngram_class_s **classes; /**< Word class definitions. */ - struct ngram_funcs_s *funcs; /**< Implementation-specific methods. */ -}; - -/** - * Implementation of ngram_class_t. - */ -struct ngram_class_s { - int32 tag_wid; /**< Base word ID for this class tag */ - int32 start_wid; /**< Starting base word ID for this class' words */ - int32 n_words; /**< Number of base words for this class */ - int32 *prob1; /**< Probability table for base words */ - /** - * Custom hash table for additional words. - */ - struct ngram_hash_s { - int32 wid; /**< Word ID of this bucket */ - int32 prob1; /**< Probability for this word */ - int32 next; /**< Index of next bucket (or -1 for no collision) */ - } *nword_hash; - int32 n_hash; /**< Number of buckets in nword_hash (power of 2) */ - int32 n_hash_inuse; /**< Number of words in nword_hash */ -}; - -#define NGRAM_HASH_SIZE 128 - -#define NGRAM_BASEWID(wid) ((wid)&0xffffff) -#define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f) -#define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid)) -#define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000) - -#define UG_ALLOC_STEP 10 - -/** Implementation-specific functions for operating on ngram_model_t objects */ -typedef struct ngram_funcs_s { - /** - * Implementation-specific function for freeing an ngram_model_t. - */ - void (*free)(ngram_model_t *model); - /** - * Implementation-specific function for applying language model weights. - */ - int (*apply_weights)(ngram_model_t *model, - float32 lw, - float32 wip, - float32 uw); - /** - * Implementation-specific function for querying language model score. - */ - int32 (*score)(ngram_model_t *model, - int32 wid, - int32 *history, - int32 n_hist, - int32 *n_used); - /** - * Implementation-specific function for querying raw language - * model probability. - */ - int32 (*raw_score)(ngram_model_t *model, - int32 wid, - int32 *history, - int32 n_hist, - int32 *n_used); - /** - * Implementation-specific function for adding unigrams. - * - * This function updates the internal structures of a language - * model to add the given unigram with the given weight (defined - * as a log-factor applied to the uniform distribution). This - * includes reallocating or otherwise resizing the set of unigrams. - * - * @return The language model score (not raw log-probability) of - * the new word, or 0 for failure. - */ - int32 (*add_ug)(ngram_model_t *model, - int32 wid, int32 lweight); - /** - * Implementation-specific function for purging N-Gram cache - */ - void (*flush)(ngram_model_t *model); - - /** - * Implementation-specific function for iterating. - */ - ngram_iter_t * (*iter)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist); - - /** - * Implementation-specific function for iterating. - */ - ngram_iter_t * (*mgrams)(ngram_model_t *model, int32 m); - - /** - * Implementation-specific function for iterating. - */ - ngram_iter_t * (*successors)(ngram_iter_t *itor); - - /** - * Implementation-specific function for iterating. - */ - int32 const * (*iter_get)(ngram_iter_t *itor, - int32 *out_score, - int32 *out_bowt); - - /** - * Implementation-specific function for iterating. - */ - ngram_iter_t * (*iter_next)(ngram_iter_t *itor); - - /** - * Implementation-specific function for iterating. - */ - void (*iter_free)(ngram_iter_t *itor); -} ngram_funcs_t; - -/** - * Base iterator structure for N-grams. - */ -struct ngram_iter_s { - ngram_model_t *model; - int32 *wids; /**< Scratch space for word IDs. */ - int16 m; /**< Order of history. */ - int16 successor; /**< Is this a successor iterator? */ -}; - -/** - * One class definition from a classdef file. - */ -typedef struct classdef_s { - char **words; - float32 *weights; - int32 n_words; -} classdef_t; - -/** - * Initialize the base ngram_model_t structure. - */ -int32 -ngram_model_init(ngram_model_t *model, - ngram_funcs_t *funcs, - logmath_t *lmath, - int32 n, int32 n_unigram); - -/** - * Read an N-Gram model from an ARPABO text file. - */ -ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config, - const char *file_name, - logmath_t *lmath); -/** - * Read an N-Gram model from a Sphinx .DMP binary file. - */ -ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config, - const char *file_name, - logmath_t *lmath); -/** - * Read an N-Gram model from a Sphinx .DMP32 binary file. - */ -ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config, - const char *file_name, - logmath_t *lmath); - -/** - * Write an N-Gram model to an ARPABO text file. - */ -int ngram_model_arpa_write(ngram_model_t *model, - const char *file_name); -/** - * Write an N-Gram model to a Sphinx .DMP binary file. - */ -int ngram_model_dmp_write(ngram_model_t *model, - const char *file_name); - -/** - * Read a probdef file. - */ -int32 read_classdef_file(hash_table_t *classes, const char *classdef_file); - -/** - * Free a class definition. - */ -void classdef_free(classdef_t *classdef); - -/** - * Allocate and initialize an N-Gram class. - */ -ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid, - int32 start_wid, glist_t classwords); - -/** - * Deallocate an N-Gram class. - */ -void ngram_class_free(ngram_class_t *lmclass); - -/** - * Get the in-class log probability for a word in an N-Gram class. - * - * @return This probability, or 1 if word not found. - */ -int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid); - -/** - * Initialize base M-Gram iterator structure. - */ -void ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model, - int m, int successor); - -#endif /* __NGRAM_MODEL_INTERNAL_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c deleted file mode 100644 index 50b7557ae..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c +++ /dev/null @@ -1,870 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2008 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/** - * @file ngram_model_set.c Set of language models. - * @author David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#include <string.h> -#include <stdlib.h> - -#include "sphinxbase/err.h" -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/strfuncs.h" -#include "sphinxbase/filename.h" - -#include "ngram_model_set.h" - -static ngram_funcs_t ngram_model_set_funcs; - -static int -my_compare(const void *a, const void *b) -{ - /* Make sure <UNK> floats to the beginning. */ - if (strcmp(*(char * const *)a, "<UNK>") == 0) - return -1; - else if (strcmp(*(char * const *)b, "<UNK>") == 0) - return 1; - else - return strcmp(*(char * const *)a, *(char * const *)b); -} - -static void -build_widmap(ngram_model_t *base, logmath_t *lmath, int32 n) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - ngram_model_t **models = set->lms; - hash_table_t *vocab; - glist_t hlist; - gnode_t *gn; - int32 i; - - /* Construct a merged vocabulary and a set of word-ID mappings. */ - vocab = hash_table_new(models[0]->n_words, FALSE); - /* Create the set of merged words. */ - for (i = 0; i < set->n_models; ++i) { - int32 j; - for (j = 0; j < models[i]->n_words; ++j) { - /* Ignore collisions. */ - (void)hash_table_enter_int32(vocab, models[i]->word_str[j], j); - } - } - /* Create the array of words, then sort it. */ - if (hash_table_lookup(vocab, "<UNK>", NULL) != 0) - (void)hash_table_enter_int32(vocab, "<UNK>", 0); - /* Now we know the number of unigrams, initialize the base model. */ - ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab)); - base->writable = FALSE; /* We will reuse the pointers from the submodels. */ - i = 0; - hlist = hash_table_tolist(vocab, NULL); - for (gn = hlist; gn; gn = gnode_next(gn)) { - hash_entry_t *ent = gnode_ptr(gn); - base->word_str[i++] = (char *)ent->key; - } - glist_free(hlist); - qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare); - - /* Now create the word ID mappings. */ - if (set->widmap) - ckd_free_2d((void **)set->widmap); - set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, - sizeof(**set->widmap)); - for (i = 0; i < base->n_words; ++i) { - int32 j; - /* Also create the master wid mapping. */ - (void)hash_table_enter_int32(base->wid, base->word_str[i], i); - /* printf("%s: %d => ", base->word_str[i], i); */ - for (j = 0; j < set->n_models; ++j) { - set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]); - /* printf("%d ", set->widmap[i][j]); */ - } - /* printf("\n"); */ - } - hash_table_free(vocab); -} - -ngram_model_t * -ngram_model_set_init(cmd_ln_t *config, - ngram_model_t **models, - char **names, - const float32 *weights, - int32 n_models) -{ - ngram_model_set_t *model; - ngram_model_t *base; - logmath_t *lmath; - int32 i, n; - - if (n_models == 0) /* WTF */ - return NULL; - - /* Do consistency checking on the models. They must all use the - * same logbase and shift. */ - lmath = models[0]->lmath; - for (i = 1; i < n_models; ++i) { - if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath) - || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) { - E_ERROR("Log-math parameters don't match, will not create LM set\n"); - return NULL; - } - } - - /* Allocate the combined model, initialize it. */ - model = ckd_calloc(1, sizeof(*model)); - base = &model->base; - model->n_models = n_models; - model->lms = ckd_calloc(n_models, sizeof(*model->lms)); - model->names = ckd_calloc(n_models, sizeof(*model->names)); - /* Initialize weights to a uniform distribution */ - model->lweights = ckd_calloc(n_models, sizeof(*model->lweights)); - { - int32 uniform = logmath_log(lmath, 1.0/n_models); - for (i = 0; i < n_models; ++i) - model->lweights[i] = uniform; - } - /* Default to interpolate if weights were given. */ - if (weights) - model->cur = -1; - - n = 0; - for (i = 0; i < n_models; ++i) { - model->lms[i] = ngram_model_retain(models[i]); - model->names[i] = ckd_salloc(names[i]); - if (weights) - model->lweights[i] = logmath_log(lmath, weights[i]); - /* N is the maximum of all merged models. */ - if (models[i]->n > n) - n = models[i]->n; - } - /* Allocate the history mapping table. */ - model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist)); - - /* Now build the word-ID mapping and merged vocabulary. */ - build_widmap(base, lmath, n); - return base; -} - -ngram_model_t * -ngram_model_set_read(cmd_ln_t *config, - const char *lmctlfile, - logmath_t *lmath) -{ - FILE *ctlfp; - glist_t lms = NULL; - glist_t lmnames = NULL; - __BIGSTACKVARIABLE__ char str[1024]; - ngram_model_t *set = NULL; - hash_table_t *classes; - char *basedir, *c; - - /* Read all the class definition files to accumulate a mapping of - * classnames to definitions. */ - classes = hash_table_new(0, FALSE); - if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { - E_ERROR_SYSTEM("Failed to open %s", lmctlfile); - return NULL; - } - - /* Try to find the base directory to append to relative paths in - * the lmctl file. */ - if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { - /* Include the trailing slash. */ - basedir = ckd_calloc(c - lmctlfile + 2, 1); - memcpy(basedir, lmctlfile, c - lmctlfile + 1); - } - else { - basedir = NULL; - } - E_INFO("Reading LM control file '%s'\n", lmctlfile); - if (basedir) - E_INFO("Will prepend '%s' to unqualified paths\n", basedir); - - if (fscanf(ctlfp, "%1023s", str) == 1) { - if (strcmp(str, "{") == 0) { - /* Load LMclass files */ - while ((fscanf(ctlfp, "%1023s", str) == 1) - && (strcmp(str, "}") != 0)) { - char *deffile; - if (basedir && !path_is_absolute(str)) - deffile = string_join(basedir, str, NULL); - else - deffile = ckd_salloc(str); - E_INFO("Reading classdef from '%s'\n", deffile); - if (read_classdef_file(classes, deffile) < 0) { - ckd_free(deffile); - goto error_out; - } - ckd_free(deffile); - } - - if (strcmp(str, "}") != 0) { - E_ERROR("Unexpected EOF in %s\n", lmctlfile); - goto error_out; - } - - /* This might be the first LM name. */ - if (fscanf(ctlfp, "%1023s", str) != 1) - str[0] = '\0'; - } - } - else - str[0] = '\0'; - - /* Read in one LM at a time and add classes to them as necessary. */ - while (str[0] != '\0') { - char *lmfile; - ngram_model_t *lm; - - if (basedir && str[0] != '/' && str[0] != '\\') - lmfile = string_join(basedir, str, NULL); - else - lmfile = ckd_salloc(str); - E_INFO("Reading lm from '%s'\n", lmfile); - lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); - if (lm == NULL) { - ckd_free(lmfile); - goto error_out; - } - if (fscanf(ctlfp, "%1023s", str) != 1) { - E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); - ckd_free(lmfile); - goto error_out; - } - ckd_free(lmfile); - lms = glist_add_ptr(lms, lm); - lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); - - if (fscanf(ctlfp, "%1023s", str) == 1) { - if (strcmp(str, "{") == 0) { - /* LM uses classes; read their names */ - while ((fscanf(ctlfp, "%1023s", str) == 1) && - (strcmp(str, "}") != 0)) { - void *val; - classdef_t *classdef; - - if (hash_table_lookup(classes, str, &val) == -1) { - E_ERROR("Unknown class %s in control file\n", str); - goto error_out; - } - classdef = val; - if (ngram_model_add_class(lm, str, 1.0, - classdef->words, classdef->weights, - classdef->n_words) < 0) { - goto error_out; - } - E_INFO("Added class %s containing %d words\n", - str, classdef->n_words); - } - if (strcmp(str, "}") != 0) { - E_ERROR("Unexpected EOF in %s\n", lmctlfile); - goto error_out; - } - if (fscanf(ctlfp, "%1023s", str) != 1) - str[0] = '\0'; - } - } - else - str[0] = '\0'; - } - fclose(ctlfp); - - /* Now construct arrays out of lms and lmnames, and build an - * ngram_model_set. */ - lms = glist_reverse(lms); - lmnames = glist_reverse(lmnames); - { - int32 n_models; - ngram_model_t **lm_array; - char **name_array; - gnode_t *lm_node, *name_node; - int32 i; - - n_models = glist_count(lms); - lm_array = ckd_calloc(n_models, sizeof(*lm_array)); - name_array = ckd_calloc(n_models, sizeof(*name_array)); - lm_node = lms; - name_node = lmnames; - for (i = 0; i < n_models; ++i) { - lm_array[i] = gnode_ptr(lm_node); - name_array[i] = gnode_ptr(name_node); - lm_node = gnode_next(lm_node); - name_node = gnode_next(name_node); - } - set = ngram_model_set_init(config, lm_array, name_array, - NULL, n_models); - ckd_free(lm_array); - ckd_free(name_array); - } -error_out: - { - gnode_t *gn; - glist_t hlist; - - if (set == NULL) { - for (gn = lms; gn; gn = gnode_next(gn)) { - ngram_model_free(gnode_ptr(gn)); - } - } - glist_free(lms); - for (gn = lmnames; gn; gn = gnode_next(gn)) { - ckd_free(gnode_ptr(gn)); - } - glist_free(lmnames); - hlist = hash_table_tolist(classes, NULL); - for (gn = hlist; gn; gn = gnode_next(gn)) { - hash_entry_t *he = gnode_ptr(gn); - ckd_free((char *)he->key); - classdef_free(he->val); - } - glist_free(hlist); - hash_table_free(classes); - ckd_free(basedir); - } - return set; -} - -int32 -ngram_model_set_count(ngram_model_t *base) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - return set->n_models; -} - -ngram_model_set_iter_t * -ngram_model_set_iter(ngram_model_t *base) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - ngram_model_set_iter_t *itor; - - if (set == NULL || set->n_models == 0) - return NULL; - itor = ckd_calloc(1, sizeof(*itor)); - itor->set = set; - return itor; -} - -ngram_model_set_iter_t * -ngram_model_set_iter_next(ngram_model_set_iter_t *itor) -{ - if (++itor->cur == itor->set->n_models) { - ngram_model_set_iter_free(itor); - return NULL; - } - return itor; -} - -void -ngram_model_set_iter_free(ngram_model_set_iter_t *itor) -{ - ckd_free(itor); -} - -ngram_model_t * -ngram_model_set_iter_model(ngram_model_set_iter_t *itor, - char const **lmname) -{ - if (lmname) *lmname = itor->set->names[itor->cur]; - return itor->set->lms[itor->cur]; -} - -ngram_model_t * -ngram_model_set_lookup(ngram_model_t *base, - const char *name) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 i; - - if (name == NULL) { - if (set->cur == -1) - return NULL; - else - return set->lms[set->cur]; - } - - /* There probably won't be very many submodels. */ - for (i = 0; i < set->n_models; ++i) - if (0 == strcmp(set->names[i], name)) - break; - if (i == set->n_models) - return NULL; - return set->lms[i]; -} - -ngram_model_t * -ngram_model_set_select(ngram_model_t *base, - const char *name) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 i; - - /* There probably won't be very many submodels. */ - for (i = 0; i < set->n_models; ++i) - if (0 == strcmp(set->names[i], name)) - break; - if (i == set->n_models) - return NULL; - set->cur = i; - return set->lms[set->cur]; -} - -const char * -ngram_model_set_current(ngram_model_t *base) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - - if (set->cur == -1) - return NULL; - else - return set->names[set->cur]; -} - -int32 -ngram_model_set_current_wid(ngram_model_t *base, - int32 set_wid) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - - if (set->cur == -1 || set_wid >= base->n_words) - return NGRAM_INVALID_WID; - else - return set->widmap[set_wid][set->cur]; -} - -int32 -ngram_model_set_known_wid(ngram_model_t *base, - int32 set_wid) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - - if (set_wid >= base->n_words) - return FALSE; - else if (set->cur == -1) { - int32 i; - for (i = 0; i < set->n_models; ++i) { - if (set->widmap[set_wid][i] != ngram_unknown_wid(set->lms[i])) - return TRUE; - } - return FALSE; - } - else - return (set->widmap[set_wid][set->cur] - != ngram_unknown_wid(set->lms[set->cur])); -} - -ngram_model_t * -ngram_model_set_interp(ngram_model_t *base, - const char **names, - const float32 *weights) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - - /* If we have a set of weights here, then set them. */ - if (names && weights) { - int32 i, j; - - /* We hope there aren't many models. */ - for (i = 0; i < set->n_models; ++i) { - for (j = 0; j < set->n_models; ++j) - if (0 == strcmp(names[i], set->names[j])) - break; - if (j == set->n_models) { - E_ERROR("Unknown LM name %s\n", names[i]); - return NULL; - } - set->lweights[j] = logmath_log(base->lmath, weights[i]); - } - } - else if (weights) { - memcpy(set->lweights, weights, set->n_models * sizeof(*set->lweights)); - } - /* Otherwise just enable existing weights. */ - set->cur = -1; - return base; -} - -ngram_model_t * -ngram_model_set_add(ngram_model_t *base, - ngram_model_t *model, - const char *name, - float32 weight, - int reuse_widmap) - -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - float32 fprob; - int32 scale, i; - - /* Add it to the array of lms. */ - ++set->n_models; - set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms)); - set->lms[set->n_models - 1] = model; - set->names = ckd_realloc(set->names, set->n_models * sizeof(*set->names)); - set->names[set->n_models - 1] = ckd_salloc(name); - /* Expand the history mapping table if necessary. */ - if (model->n > base->n) { - base->n = model->n; - set->maphist = ckd_realloc(set->maphist, - (model->n - 1) * sizeof(*set->maphist)); - } - - /* Renormalize the interpolation weights. */ - fprob = weight * 1.0 / set->n_models; - set->lweights = ckd_realloc(set->lweights, - set->n_models * sizeof(*set->lweights)); - set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob); - /* Now normalize everything else to fit it in. This is - * accomplished by simply scaling all the other probabilities - * by (1-fprob). */ - scale = logmath_log(base->lmath, 1.0 - fprob); - for (i = 0; i < set->n_models - 1; ++i) - set->lweights[i] += scale; - - /* Reuse the old word ID mapping if requested. */ - if (reuse_widmap) { - int32 **new_widmap; - - /* Tack another column onto the widmap array. */ - new_widmap = (int32 **)ckd_calloc_2d(base->n_words, set->n_models, - sizeof (**new_widmap)); - for (i = 0; i < base->n_words; ++i) { - /* Copy all the existing mappings. */ - memcpy(new_widmap[i], set->widmap[i], - (set->n_models - 1) * sizeof(**new_widmap)); - /* Create the new mapping. */ - new_widmap[i][set->n_models-1] = ngram_wid(model, base->word_str[i]); - } - ckd_free_2d((void **)set->widmap); - set->widmap = new_widmap; - } - else { - build_widmap(base, base->lmath, base->n); - } - return model; -} - -ngram_model_t * -ngram_model_set_remove(ngram_model_t *base, - const char *name, - int reuse_widmap) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - ngram_model_t *submodel; - int32 lmidx, scale, n, i; - float32 fprob; - - for (lmidx = 0; lmidx < set->n_models; ++lmidx) - if (0 == strcmp(name, set->names[lmidx])) - break; - if (lmidx == set->n_models) - return NULL; - submodel = set->lms[lmidx]; - - /* Renormalize the interpolation weights by scaling them by - * 1/(1-fprob) */ - fprob = logmath_exp(base->lmath, set->lweights[lmidx]); - scale = logmath_log(base->lmath, 1.0 - fprob); - - /* Remove it from the array of lms, renormalize remaining weights, - * and recalcluate n. */ - --set->n_models; - n = 0; - ckd_free(set->names[lmidx]); - set->names[lmidx] = NULL; - for (i = 0; i < set->n_models; ++i) { - if (i >= lmidx) { - set->lms[i] = set->lms[i+1]; - set->names[i] = set->names[i+1]; - set->lweights[i] = set->lweights[i+1]; - } - set->lweights[i] -= scale; - if (set->lms[i]->n > n) - n = set->lms[i]->n; - } - /* There's no need to shrink these arrays. */ - set->lms[set->n_models] = NULL; - set->lweights[set->n_models] = base->log_zero; - /* No need to shrink maphist either. */ - - /* Reuse the existing word ID mapping if requested. */ - if (reuse_widmap) { - /* Just go through and shrink each row. */ - for (i = 0; i < base->n_words; ++i) { - memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1, - (set->n_models - lmidx) * sizeof(**set->widmap)); - } - } - else { - build_widmap(base, base->lmath, n); - } - return submodel; -} - -void -ngram_model_set_map_words(ngram_model_t *base, - const char **words, - int32 n_words) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 i; - - /* Recreate the word mapping. */ - if (base->writable) { - for (i = 0; i < base->n_words; ++i) { - ckd_free(base->word_str[i]); - } - } - ckd_free(base->word_str); - ckd_free_2d((void **)set->widmap); - base->writable = TRUE; - base->n_words = base->n_1g_alloc = n_words; - base->word_str = ckd_calloc(n_words, sizeof(*base->word_str)); - set->widmap = (int32 **)ckd_calloc_2d(n_words, set->n_models, sizeof(**set->widmap)); - hash_table_empty(base->wid); - for (i = 0; i < n_words; ++i) { - int32 j; - base->word_str[i] = ckd_salloc(words[i]); - (void)hash_table_enter_int32(base->wid, base->word_str[i], i); - for (j = 0; j < set->n_models; ++j) { - set->widmap[i][j] = ngram_wid(set->lms[j], base->word_str[i]); - } - } -} - -static int -ngram_model_set_apply_weights(ngram_model_t *base, float32 lw, - float32 wip, float32 uw) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 i; - - /* Apply weights to each sub-model. */ - for (i = 0; i < set->n_models; ++i) - ngram_model_apply_weights(set->lms[i], lw, wip, uw); - return 0; -} - -static int32 -ngram_model_set_score(ngram_model_t *base, int32 wid, - int32 *history, int32 n_hist, - int32 *n_used) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 mapwid; - int32 score; - int32 i; - - /* Truncate the history. */ - if (n_hist > base->n - 1) - n_hist = base->n - 1; - - /* Interpolate if there is no current. */ - if (set->cur == -1) { - score = base->log_zero; - for (i = 0; i < set->n_models; ++i) { - int32 j; - /* Map word and history IDs for each model. */ - mapwid = set->widmap[wid][i]; - for (j = 0; j < n_hist; ++j) { - if (history[j] == NGRAM_INVALID_WID) - set->maphist[j] = NGRAM_INVALID_WID; - else - set->maphist[j] = set->widmap[history[j]][i]; - } - score = logmath_add(base->lmath, score, - set->lweights[i] + - ngram_ng_score(set->lms[i], - mapwid, set->maphist, n_hist, n_used)); - } - } - else { - int32 j; - /* Map word and history IDs (FIXME: do this in a function?) */ - mapwid = set->widmap[wid][set->cur]; - for (j = 0; j < n_hist; ++j) { - if (history[j] == NGRAM_INVALID_WID) - set->maphist[j] = NGRAM_INVALID_WID; - else - set->maphist[j] = set->widmap[history[j]][set->cur]; - } - score = ngram_ng_score(set->lms[set->cur], - mapwid, set->maphist, n_hist, n_used); - } - - return score; -} - -static int32 -ngram_model_set_raw_score(ngram_model_t *base, int32 wid, - int32 *history, int32 n_hist, - int32 *n_used) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 mapwid; - int32 score; - int32 i; - - /* Truncate the history. */ - if (n_hist > base->n - 1) - n_hist = base->n - 1; - - /* Interpolate if there is no current. */ - if (set->cur == -1) { - score = base->log_zero; - for (i = 0; i < set->n_models; ++i) { - int32 j; - /* Map word and history IDs for each model. */ - mapwid = set->widmap[wid][i]; - for (j = 0; j < n_hist; ++j) { - if (history[j] == NGRAM_INVALID_WID) - set->maphist[j] = NGRAM_INVALID_WID; - else - set->maphist[j] = set->widmap[history[j]][i]; - } - score = logmath_add(base->lmath, score, - set->lweights[i] + - ngram_ng_prob(set->lms[i], - mapwid, set->maphist, n_hist, n_used)); - } - } - else { - int32 j; - /* Map word and history IDs (FIXME: do this in a function?) */ - mapwid = set->widmap[wid][set->cur]; - for (j = 0; j < n_hist; ++j) { - if (history[j] == NGRAM_INVALID_WID) - set->maphist[j] = NGRAM_INVALID_WID; - else - set->maphist[j] = set->widmap[history[j]][set->cur]; - } - score = ngram_ng_prob(set->lms[set->cur], - mapwid, set->maphist, n_hist, n_used); - } - - return score; -} - -static int32 -ngram_model_set_add_ug(ngram_model_t *base, - int32 wid, int32 lweight) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 *newwid; - int32 i, prob; - - /* At this point the word has already been added to the master - model and we have a new word ID for it. Add it to active - submodels and track the word IDs. */ - newwid = ckd_calloc(set->n_models, sizeof(*newwid)); - prob = base->log_zero; - for (i = 0; i < set->n_models; ++i) { - int32 wprob, n_hist; - - /* Only add to active models. */ - if (set->cur == -1 || set->cur == i) { - /* Did this word already exist? */ - newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]); - if (newwid[i] == NGRAM_INVALID_WID) { - /* Add it to the submodel. */ - newwid[i] = ngram_model_add_word(set->lms[i], base->word_str[wid], - logmath_exp(base->lmath, lweight)); - if (newwid[i] == NGRAM_INVALID_WID) { - ckd_free(newwid); - return base->log_zero; - } - } - /* Now get the unigram probability for the new word and either - * interpolate it or use it (if this is the current model). */ - wprob = ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist); - if (set->cur == i) - prob = wprob; - else if (set->cur == -1) - prob = logmath_add(base->lmath, prob, set->lweights[i] + wprob); - } - else { - newwid[i] = NGRAM_INVALID_WID; - } - } - /* Okay we have the word IDs for this in all the submodels. Now - do some complicated memory mangling to add this to the - widmap. */ - set->widmap = ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap)); - set->widmap[0] = ckd_realloc(set->widmap[0], - base->n_words - * set->n_models - * sizeof(**set->widmap)); - for (i = 0; i < base->n_words; ++i) - set->widmap[i] = set->widmap[0] + i * set->n_models; - memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid)); - ckd_free(newwid); - return prob; -} - -static void -ngram_model_set_free(ngram_model_t *base) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 i; - - for (i = 0; i < set->n_models; ++i) - ngram_model_free(set->lms[i]); - ckd_free(set->lms); - for (i = 0; i < set->n_models; ++i) - ckd_free(set->names[i]); - ckd_free(set->names); - ckd_free(set->lweights); - ckd_free(set->maphist); - ckd_free_2d((void **)set->widmap); -} - -static void -ngram_model_set_flush(ngram_model_t *base) -{ - ngram_model_set_t *set = (ngram_model_set_t *)base; - int32 i; - - for (i = 0; i < set->n_models; ++i) - ngram_model_flush(set->lms[i]); -} - -static ngram_funcs_t ngram_model_set_funcs = { - ngram_model_set_free, /* free */ - ngram_model_set_apply_weights, /* apply_weights */ - ngram_model_set_score, /* score */ - ngram_model_set_raw_score, /* raw_score */ - ngram_model_set_add_ug, /* add_ug */ - ngram_model_set_flush /* flush */ -}; diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h deleted file mode 100644 index 5fbc7e5a4..000000000 --- a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h +++ /dev/null @@ -1,71 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2007 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/** - * @file ngram_model_set.h Set of language models. - * @author David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -#ifndef __NGRAM_MODEL_SET_H__ -#define __NGRAM_MODEL_SET_H__ - -#include "ngram_model_internal.h" -#include "lm3g_model.h" - -/** - * Subclass of ngram_model for grouping language models. - */ -typedef struct ngram_model_set_s { - ngram_model_t base; /**< Base ngram_model_t structure. */ - - int32 n_models; /**< Number of models in this set. */ - int32 cur; /**< Currently selected model, or -1 for none. */ - ngram_model_t **lms; /**< Language models in this set. */ - char **names; /**< Names for language models. */ - int32 *lweights; /**< Log interpolation weights. */ - int32 **widmap; /**< Word ID mapping for submodels. */ - int32 *maphist; /**< Word ID mapping for N-Gram history. */ -} ngram_model_set_t; - -/** - * Iterator over a model set. - */ -struct ngram_model_set_iter_s { - ngram_model_set_t *set; - int32 cur; -}; - -#endif /* __NGRAM_MODEL_SET_H__ */ |