summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/lm
diff options
context:
space:
mode:
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/lm')
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/fsg_model.c944
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/jsgf.c943
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h140
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c1799
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h90
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c2199
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h352
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c258
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h177
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c560
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model.c1129
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c660
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h86
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c969
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h92
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h282
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c870
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h71
18 files changed, 11621 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c b/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c
new file mode 100644
index 000000000..374897754
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c
@@ -0,0 +1,944 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+/* System headers. */
+#ifdef _WIN32_WCE
+/*MC in a debug build it's implicitly included by assert.h
+ but you need this in a release build */
+#include <windows.h>
+#else
+#include <time.h>
+#endif /* _WIN32_WCE */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* SphinxBase headers. */
+#include "sphinxbase/err.h"
+#include "sphinxbase/pio.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/prim_type.h"
+#include "sphinxbase/strfuncs.h"
+#include "sphinxbase/hash_table.h"
+#include "sphinxbase/fsg_model.h"
+
+/**
+ * Adjacency list (opaque) for a state in an FSG.
+ *
+ * Actually we use hash tables so that random access is a bit faster.
+ * Plus it allows us to make the lookup code a bit less ugly.
+ */
+
+struct trans_list_s {
+ hash_table_t *null_trans; /* Null transitions keyed by state. */
+ hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
+};
+
+/**
+ * Implementation of arc iterator.
+ */
+struct fsg_arciter_s {
+ hash_iter_t *itor, *null_itor;
+ gnode_t *gn;
+};
+
+#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
+#define FSG_MODEL_END_DECL "FSG_END"
+#define FSG_MODEL_N_DECL "N"
+#define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
+#define FSG_MODEL_S_DECL "S"
+#define FSG_MODEL_START_STATE_DECL "START_STATE"
+#define FSG_MODEL_F_DECL "F"
+#define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
+#define FSG_MODEL_T_DECL "T"
+#define FSG_MODEL_TRANSITION_DECL "TRANSITION"
+#define FSG_MODEL_COMMENT_CHAR '#'
+
+
+static int32
+nextline_str2words(FILE * fp, int32 * lineno,
+ char **lineptr, char ***wordptr)
+{
+ for (;;) {
+ size_t len;
+ int32 n;
+
+ ckd_free(*lineptr);
+ if ((*lineptr = fread_line(fp, &len)) == NULL)
+ return -1;
+
+ (*lineno)++;
+
+ if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
+ continue; /* Skip comment lines */
+
+ n = str2words(*lineptr, NULL, 0);
+ if (n == 0)
+ continue; /* Skip blank lines */
+
+ /* Abuse of realloc(), but this doesn't have to be fast. */
+ if (*wordptr == NULL)
+ *wordptr = ckd_calloc(n, sizeof(**wordptr));
+ else
+ *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
+ return str2words(*lineptr, *wordptr, n);
+ }
+}
+
+void
+fsg_model_trans_add(fsg_model_t * fsg,
+ int32 from, int32 to, int32 logp, int32 wid)
+{
+ fsg_link_t *link;
+ glist_t gl;
+ gnode_t *gn;
+
+ if (fsg->trans[from].trans == NULL)
+ fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
+
+ /* Check for duplicate link (i.e., link already exists with label=wid) */
+ for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
+ link = (fsg_link_t *) gnode_ptr(gn);
+ if (link->wid == wid) {
+ if (link->logs2prob < logp)
+ link->logs2prob = logp;
+ return;
+ }
+ }
+
+ /* Create transition object */
+ link = listelem_malloc(fsg->link_alloc);
+ link->from_state = from;
+ link->to_state = to;
+ link->logs2prob = logp;
+ link->wid = wid;
+
+ /* Add it to the list of transitions and update the hash table */
+ gl = glist_add_ptr(gl, (void *) link);
+ hash_table_replace_bkey(fsg->trans[from].trans,
+ (char const *) &link->to_state,
+ sizeof(link->to_state), gl);
+}
+
+int32
+fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
+ int32 logp, int32 wid)
+{
+ fsg_link_t *link, *link2;
+
+ /* Check for transition probability */
+ if (logp > 0) {
+ E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
+ from, to);
+ }
+
+ /* Self-loop null transitions (with prob <= 1.0) are redundant */
+ if (from == to)
+ return -1;
+
+ if (fsg->trans[from].null_trans == NULL)
+ fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
+
+ /* Check for a duplicate link; if found, keep the higher prob */
+ link = fsg_model_null_trans(fsg, from, to);
+ if (link) {
+ if (link->logs2prob < logp) {
+ link->logs2prob = logp;
+ return 0;
+ }
+ else
+ return -1;
+ }
+
+ /* Create null transition object */
+ link = listelem_malloc(fsg->link_alloc);
+ link->from_state = from;
+ link->to_state = to;
+ link->logs2prob = logp;
+ link->wid = -1;
+
+ link2 = (fsg_link_t *)
+ hash_table_enter_bkey(fsg->trans[from].null_trans,
+ (char const *) &link->to_state,
+ sizeof(link->to_state), link);
+ assert(link == link2);
+
+ return 1;
+}
+
+int32
+fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
+ int32 logp)
+{
+ return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
+}
+
+glist_t
+fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
+{
+ gnode_t *gn1;
+ int updated;
+ fsg_link_t *tl1, *tl2;
+ int32 k, n;
+
+ E_INFO("Computing transitive closure for null transitions\n");
+
+ /* If our caller didn't give us a list of null-transitions,
+ make such a list. Just loop through all the FSG states,
+ and all the null-transitions in that state (which are kept in
+ their own hash table). */
+ if (nulls == NULL) {
+ int i;
+ for (i = 0; i < fsg->n_state; ++i) {
+ hash_iter_t *itor;
+ hash_table_t *null_trans = fsg->trans[i].null_trans;
+ if (null_trans == NULL)
+ continue;
+ for (itor = hash_table_iter(null_trans);
+ itor != NULL;
+ itor = hash_table_iter_next(itor)) {
+ nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent));
+ }
+ }
+ }
+
+ /*
+ * Probably not the most efficient closure implementation, in general, but
+ * probably reasonably efficient for a sparse null transition matrix.
+ */
+ n = 0;
+ do {
+ updated = FALSE;
+
+ for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
+ hash_iter_t *itor;
+
+ tl1 = (fsg_link_t *) gnode_ptr(gn1);
+ assert(tl1->wid < 0);
+
+ if (fsg->trans[tl1->to_state].null_trans == NULL)
+ continue;
+
+ for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans);
+ itor; itor = hash_table_iter_next(itor)) {
+
+ tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
+
+ k = fsg_model_null_trans_add(fsg,
+ tl1->from_state,
+ tl2->to_state,
+ tl1->logs2prob +
+ tl2->logs2prob);
+ if (k >= 0) {
+ updated = TRUE;
+ if (k > 0) {
+ nulls = glist_add_ptr(nulls, (void *)
+ fsg_model_null_trans
+ (fsg, tl1->from_state,
+ tl2->to_state));
+ n++;
+ }
+ }
+ }
+ }
+ } while (updated);
+
+ E_INFO("%d null transitions added\n", n);
+
+ return nulls;
+}
+
+glist_t
+fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
+{
+ void *val;
+
+ if (fsg->trans[i].trans == NULL)
+ return NULL;
+ if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
+ sizeof(j), &val) < 0)
+ return NULL;
+ return (glist_t) val;
+}
+
+fsg_link_t *
+fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
+{
+ void *val;
+
+ if (fsg->trans[i].null_trans == NULL)
+ return NULL;
+ if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
+ sizeof(j), &val) < 0)
+ return NULL;
+ return (fsg_link_t *) val;
+}
+
+fsg_arciter_t *
+fsg_model_arcs(fsg_model_t * fsg, int32 i)
+{
+ fsg_arciter_t *itor;
+
+ if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
+ return NULL;
+ itor = ckd_calloc(1, sizeof(*itor));
+ if (fsg->trans[i].null_trans)
+ itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
+ if (fsg->trans[i].trans)
+ itor->itor = hash_table_iter(fsg->trans[i].trans);
+ if (itor->itor != NULL)
+ itor->gn = hash_entry_val(itor->itor->ent);
+ return itor;
+}
+
+fsg_link_t *
+fsg_arciter_get(fsg_arciter_t * itor)
+{
+ /* Iterate over non-null arcs first. */
+ if (itor->gn)
+ return (fsg_link_t *) gnode_ptr(itor->gn);
+ else if (itor->null_itor)
+ return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
+ else
+ return NULL;
+}
+
+fsg_arciter_t *
+fsg_arciter_next(fsg_arciter_t * itor)
+{
+ /* Iterate over non-null arcs first. */
+ if (itor->gn) {
+ itor->gn = gnode_next(itor->gn);
+ /* Move to the next destination arc. */
+ if (itor->gn == NULL) {
+ itor->itor = hash_table_iter_next(itor->itor);
+ if (itor->itor != NULL)
+ itor->gn = hash_entry_val(itor->itor->ent);
+ else if (itor->null_itor == NULL)
+ goto stop_iteration;
+ }
+ }
+ else {
+ if (itor->null_itor == NULL)
+ goto stop_iteration;
+ itor->null_itor = hash_table_iter_next(itor->null_itor);
+ if (itor->null_itor == NULL)
+ goto stop_iteration;
+ }
+ return itor;
+ stop_iteration:
+ fsg_arciter_free(itor);
+ return NULL;
+
+}
+
+void
+fsg_arciter_free(fsg_arciter_t * itor)
+{
+ if (itor == NULL)
+ return;
+ hash_table_iter_free(itor->null_itor);
+ hash_table_iter_free(itor->itor);
+ ckd_free(itor);
+}
+
+int
+fsg_model_word_id(fsg_model_t * fsg, char const *word)
+{
+ int wid;
+
+ /* Search for an existing word matching this. */
+ for (wid = 0; wid < fsg->n_word; ++wid) {
+ if (0 == strcmp(fsg->vocab[wid], word))
+ break;
+ }
+ /* If not found, add this to the vocab. */
+ if (wid == fsg->n_word)
+ return -1;
+ return wid;
+}
+
+int
+fsg_model_word_add(fsg_model_t * fsg, char const *word)
+{
+ int wid, old_size;
+
+ /* Search for an existing word matching this. */
+ wid = fsg_model_word_id(fsg, word);
+ /* If not found, add this to the vocab. */
+ if (wid == -1) {
+ wid = fsg->n_word;
+ if (fsg->n_word == fsg->n_word_alloc) {
+ old_size = fsg->n_word_alloc;
+ fsg->n_word_alloc += 10;
+ fsg->vocab = ckd_realloc(fsg->vocab,
+ fsg->n_word_alloc *
+ sizeof(*fsg->vocab));
+ if (fsg->silwords)
+ fsg->silwords =
+ bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc);
+ if (fsg->altwords)
+ fsg->altwords =
+ bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc);
+ }
+ ++fsg->n_word;
+ fsg->vocab[wid] = ckd_salloc(word);
+ }
+ return wid;
+}
+
+int
+fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
+ int state, float32 silprob)
+{
+ int32 logsilp;
+ int n_trans, silwid, src;
+
+ E_INFO("Adding silence transitions for %s to FSG\n", silword);
+
+ silwid = fsg_model_word_add(fsg, silword);
+ logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
+ if (fsg->silwords == NULL)
+ fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
+ bitvec_set(fsg->silwords, silwid);
+
+ n_trans = 0;
+ if (state == -1) {
+ for (src = 0; src < fsg->n_state; src++) {
+ fsg_model_trans_add(fsg, src, src, logsilp, silwid);
+ ++n_trans;
+ }
+ }
+ else {
+ fsg_model_trans_add(fsg, state, state, logsilp, silwid);
+ ++n_trans;
+ }
+
+ E_INFO("Added %d silence word transitions\n", n_trans);
+ return n_trans;
+}
+
+int
+fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
+ char const *altword)
+{
+ int i, basewid, altwid;
+ int ntrans;
+
+ /* FIXME: This will get slow, eventually... */
+ for (basewid = 0; basewid < fsg->n_word; ++basewid)
+ if (0 == strcmp(fsg->vocab[basewid], baseword))
+ break;
+ if (basewid == fsg->n_word) {
+ E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
+ return -1;
+ }
+ altwid = fsg_model_word_add(fsg, altword);
+ if (fsg->altwords == NULL)
+ fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
+ bitvec_set(fsg->altwords, altwid);
+ if (fsg_model_is_filler(fsg, basewid)) {
+ if (fsg->silwords == NULL)
+ fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
+ bitvec_set(fsg->silwords, altwid);
+ }
+
+ E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
+ baseword, altword));
+
+ /* Look for all transitions involving baseword and duplicate them. */
+ /* FIXME: This will also get slow, eventually... */
+ ntrans = 0;
+ for (i = 0; i < fsg->n_state; ++i) {
+ hash_iter_t *itor;
+ if (fsg->trans[i].trans == NULL)
+ continue;
+ for (itor = hash_table_iter(fsg->trans[i].trans); itor;
+ itor = hash_table_iter_next(itor)) {
+ glist_t trans;
+ gnode_t *gn;
+
+ trans = hash_entry_val(itor->ent);
+ for (gn = trans; gn; gn = gnode_next(gn)) {
+ fsg_link_t *fl = gnode_ptr(gn);
+ if (fl->wid == basewid) {
+ fsg_link_t *link;
+
+ /* Create transition object */
+ link = listelem_malloc(fsg->link_alloc);
+ link->from_state = fl->from_state;
+ link->to_state = fl->to_state;
+ link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
+ link->wid = altwid;
+
+ trans = glist_add_ptr(trans, (void *) link);
+ ++ntrans;
+ }
+ }
+ hash_entry_val(itor->ent) = trans;
+ }
+ }
+
+ E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
+ return ntrans;
+}
+
+
+fsg_model_t *
+fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
+ int32 n_state)
+{
+ fsg_model_t *fsg;
+
+ /* Allocate basic stuff. */
+ fsg = ckd_calloc(1, sizeof(*fsg));
+ fsg->refcount = 1;
+ fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t));
+ fsg->lmath = lmath;
+ fsg->name = name ? ckd_salloc(name) : NULL;
+ fsg->n_state = n_state;
+ fsg->lw = lw;
+
+ fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
+
+ return fsg;
+}
+
+fsg_model_t *
+fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
+{
+ fsg_model_t *fsg;
+ hash_table_t *vocab;
+ hash_iter_t *itor;
+ int32 lastwid;
+ char **wordptr;
+ char *lineptr;
+ char *fsgname;
+ int32 lineno;
+ int32 n, i, j;
+ int n_state, n_trans, n_null_trans;
+ glist_t nulls;
+ float32 p;
+
+ lineno = 0;
+ vocab = hash_table_new(32, FALSE);
+ wordptr = NULL;
+ lineptr = NULL;
+ nulls = NULL;
+ fsgname = NULL;
+ fsg = NULL;
+
+ /* Scan upto FSG_BEGIN header */
+ for (;;) {
+ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
+ if (n < 0) {
+ E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
+ goto parse_error;
+ }
+
+ if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
+ if (n > 2) {
+ E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
+ lineno);
+ goto parse_error;
+ }
+ break;
+ }
+ }
+ /* Save FSG name, or it will get clobbered below :(.
+ * If name is missing, try the default.
+ */
+ if (n == 2) {
+ fsgname = ckd_salloc(wordptr[1]);
+ }
+ else {
+ E_WARN("FSG name is missing\n");
+ fsgname = ckd_salloc("unknown");
+ }
+
+ /* Read #states */
+ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
+ if ((n != 2)
+ || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
+ && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
+ || (sscanf(wordptr[1], "%d", &n_state) != 1)
+ || (n_state <= 0)) {
+ E_ERROR
+ ("Line[%d]: #states declaration line missing or malformed\n",
+ lineno);
+ goto parse_error;
+ }
+
+ /* Now create the FSG. */
+ fsg = fsg_model_init(fsgname, lmath, lw, n_state);
+ ckd_free(fsgname);
+ fsgname = NULL;
+
+ /* Read start state */
+ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
+ if ((n != 2)
+ || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
+ && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
+ || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
+ || (fsg->start_state < 0)
+ || (fsg->start_state >= fsg->n_state)) {
+ E_ERROR
+ ("Line[%d]: start state declaration line missing or malformed\n",
+ lineno);
+ goto parse_error;
+ }
+
+ /* Read final state */
+ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
+ if ((n != 2)
+ || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
+ && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
+ || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
+ || (fsg->final_state < 0)
+ || (fsg->final_state >= fsg->n_state)) {
+ E_ERROR
+ ("Line[%d]: final state declaration line missing or malformed\n",
+ lineno);
+ goto parse_error;
+ }
+
+ /* Read transitions */
+ lastwid = 0;
+ n_trans = n_null_trans = 0;
+ for (;;) {
+ int32 wid, tprob;
+
+ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
+ if (n <= 0) {
+ E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
+ lineno);
+ goto parse_error;
+ }
+
+ if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
+ break;
+ }
+
+ if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
+ || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
+
+
+ if (((n != 4) && (n != 5))
+ || (sscanf(wordptr[1], "%d", &i) != 1)
+ || (sscanf(wordptr[2], "%d", &j) != 1)
+ || (i < 0) || (i >= fsg->n_state)
+ || (j < 0) || (j >= fsg->n_state)) {
+ E_ERROR
+ ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
+ lineno);
+ goto parse_error;
+ }
+
+ p = atof_c(wordptr[3]);
+ if ((p <= 0.0) || (p > 1.0)) {
+ E_ERROR
+ ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
+ lineno);
+ goto parse_error;
+ }
+ }
+ else {
+ E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
+ lineno);
+ goto parse_error;
+ }
+
+ tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
+ /* Add word to "dictionary". */
+ if (n > 4) {
+ if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
+ (void) hash_table_enter_int32(vocab,
+ ckd_salloc(wordptr[4]),
+ lastwid);
+ wid = lastwid;
+ ++lastwid;
+ }
+ fsg_model_trans_add(fsg, i, j, tprob, wid);
+ ++n_trans;
+ }
+ else {
+ if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
+ ++n_null_trans;
+ nulls =
+ glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
+ }
+ }
+ }
+
+ E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
+ fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
+
+
+ /* Now create a string table from the "dictionary" */
+ fsg->n_word = hash_table_inuse(vocab);
+ fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
+ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
+ for (itor = hash_table_iter(vocab); itor;
+ itor = hash_table_iter_next(itor)) {
+ char const *word = hash_entry_key(itor->ent);
+ int32 wid = (int32) (long) hash_entry_val(itor->ent);
+ fsg->vocab[wid] = (char *) word;
+ }
+ hash_table_free(vocab);
+
+ /* Do transitive closure on null transitions */
+ nulls = fsg_model_null_trans_closure(fsg, nulls);
+ glist_free(nulls);
+
+ ckd_free(lineptr);
+ ckd_free(wordptr);
+
+ return fsg;
+
+ parse_error:
+ for (itor = hash_table_iter(vocab); itor;
+ itor = hash_table_iter_next(itor))
+ ckd_free((char *) hash_entry_key(itor->ent));
+ glist_free(nulls);
+ hash_table_free(vocab);
+ ckd_free(fsgname);
+ ckd_free(lineptr);
+ ckd_free(wordptr);
+ fsg_model_free(fsg);
+ return NULL;
+}
+
+
+fsg_model_t *
+fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
+{
+ FILE *fp;
+ fsg_model_t *fsg;
+
+ if ((fp = fopen(file, "r")) == NULL) {
+ E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
+ return NULL;
+ }
+ fsg = fsg_model_read(fp, lmath, lw);
+ fclose(fp);
+ return fsg;
+}
+
+fsg_model_t *
+fsg_model_retain(fsg_model_t * fsg)
+{
+ ++fsg->refcount;
+ return fsg;
+}
+
+static void
+trans_list_free(fsg_model_t * fsg, int32 i)
+{
+ hash_iter_t *itor;
+
+ /* FIXME (maybe): FSG links will all get freed when we call
+ * listelem_alloc_free() so don't bother freeing them explicitly
+ * here. */
+ if (fsg->trans[i].trans) {
+ for (itor = hash_table_iter(fsg->trans[i].trans);
+ itor; itor = hash_table_iter_next(itor)) {
+ glist_t gl = (glist_t) hash_entry_val(itor->ent);
+ glist_free(gl);
+ }
+ }
+ hash_table_free(fsg->trans[i].trans);
+ hash_table_free(fsg->trans[i].null_trans);
+}
+
+int
+fsg_model_free(fsg_model_t * fsg)
+{
+ int i;
+
+ if (fsg == NULL)
+ return 0;
+
+ if (--fsg->refcount > 0)
+ return fsg->refcount;
+
+ for (i = 0; i < fsg->n_word; ++i)
+ ckd_free(fsg->vocab[i]);
+ for (i = 0; i < fsg->n_state; ++i)
+ trans_list_free(fsg, i);
+ ckd_free(fsg->trans);
+ ckd_free(fsg->vocab);
+ listelem_alloc_free(fsg->link_alloc);
+ bitvec_free(fsg->silwords);
+ bitvec_free(fsg->altwords);
+ ckd_free(fsg->name);
+ ckd_free(fsg);
+ return 0;
+}
+
+
+void
+fsg_model_write(fsg_model_t * fsg, FILE * fp)
+{
+ int32 i;
+
+ fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
+ fsg->name ? fsg->name : "");
+ fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
+ fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
+ fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
+
+ for (i = 0; i < fsg->n_state; i++) {
+ fsg_arciter_t *itor;
+
+ for (itor = fsg_model_arcs(fsg, i); itor;
+ itor = fsg_arciter_next(itor)) {
+ fsg_link_t *tl = fsg_arciter_get(itor);
+
+ fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
+ tl->from_state, tl->to_state,
+ logmath_exp(fsg->lmath,
+ (int32) (tl->logs2prob / fsg->lw)),
+ (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
+ }
+ }
+
+ fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
+
+ fflush(fp);
+}
+
+void
+fsg_model_writefile(fsg_model_t * fsg, char const *file)
+{
+ FILE *fp;
+
+ assert(fsg);
+
+ E_INFO("Writing FSG file '%s'\n", file);
+
+ if ((fp = fopen(file, "w")) == NULL) {
+ E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
+ return;
+ }
+
+ fsg_model_write(fsg, fp);
+
+ fclose(fp);
+}
+
+static void
+fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
+{
+ fsg_arciter_t *itor;
+
+ for (itor = fsg_model_arcs(fsg, i); itor;
+ itor = fsg_arciter_next(itor)) {
+ fsg_link_t *tl = fsg_arciter_get(itor);
+ fprintf(fp, "%d %d %s %f\n",
+ tl->from_state, tl->to_state,
+ (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
+ -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
+ }
+}
+
+void
+fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
+{
+ int i;
+
+ /* Write transitions from initial state first. */
+ fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
+
+ /* Other states. */
+ for (i = 0; i < fsg->n_state; i++) {
+ if (i == fsg_model_start_state(fsg))
+ continue;
+ fsg_model_write_fsm_trans(fsg, i, fp);
+ }
+
+ /* Final state. */
+ fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
+
+ fflush(fp);
+}
+
+void
+fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
+{
+ FILE *fp;
+
+ assert(fsg);
+
+ E_INFO("Writing FSM file '%s'\n", file);
+
+ if ((fp = fopen(file, "w")) == NULL) {
+ E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
+ return;
+ }
+
+ fsg_model_write_fsm(fsg, fp);
+
+ fclose(fp);
+}
+
+void
+fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
+{
+ int i;
+
+ fprintf(file, "<eps> 0\n");
+ for (i = 0; i < fsg_model_n_word(fsg); ++i) {
+ fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
+ }
+ fflush(file);
+}
+
+void
+fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
+{
+ FILE *fp;
+
+ assert(fsg);
+
+ E_INFO("Writing FSM symbol table '%s'\n", file);
+
+ if ((fp = fopen(file, "w")) == NULL) {
+ E_ERROR("Failed to open symbol table '%s' for writing", file);
+ return;
+ }
+
+ fsg_model_write_symtab(fsg, fp);
+
+ fclose(fp);
+}
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c
new file mode 100644
index 000000000..90e161c62
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c
@@ -0,0 +1,943 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+#include <string.h>
+#include <assert.h>
+
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/strfuncs.h"
+#include "sphinxbase/hash_table.h"
+#include "sphinxbase/filename.h"
+#include "sphinxbase/err.h"
+#include "sphinxbase/jsgf.h"
+
+#include "jsgf_internal.h"
+#include "jsgf_parser.h"
+#include "jsgf_scanner.h"
+
+extern int yyparse (void* scanner, jsgf_t* jsgf);
+
+/**
+ * \file jsgf.c
+ *
+ * This file implements the data structures for parsing JSGF grammars
+ * into Sphinx finite-state grammars.
+ **/
+
+static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, int rule_exit);
+
+jsgf_atom_t *
+jsgf_atom_new(char *name, float weight)
+{
+ jsgf_atom_t *atom;
+
+ atom = ckd_calloc(1, sizeof(*atom));
+ atom->name = ckd_salloc(name);
+ atom->weight = weight;
+ return atom;
+}
+
+int
+jsgf_atom_free(jsgf_atom_t *atom)
+{
+ if (atom == NULL)
+ return 0;
+ ckd_free(atom->name);
+ ckd_free(atom);
+ return 0;
+}
+
+jsgf_t *
+jsgf_grammar_new(jsgf_t *parent)
+{
+ jsgf_t *grammar;
+
+ grammar = ckd_calloc(1, sizeof(*grammar));
+ /* If this is an imported/subgrammar, then we will share a global
+ * namespace with the parent grammar. */
+ if (parent) {
+ grammar->rules = parent->rules;
+ grammar->imports = parent->imports;
+ grammar->searchpath = parent->searchpath;
+ grammar->parent = parent;
+ }
+ else {
+ grammar->rules = hash_table_new(64, 0);
+ grammar->imports = hash_table_new(16, 0);
+ }
+
+ return grammar;
+}
+
+void
+jsgf_grammar_free(jsgf_t *jsgf)
+{
+ /* FIXME: Probably should just use refcounting instead. */
+ if (jsgf->parent == NULL) {
+ hash_iter_t *itor;
+ gnode_t *gn;
+
+ for (itor = hash_table_iter(jsgf->rules); itor;
+ itor = hash_table_iter_next(itor)) {
+ ckd_free((char *)itor->ent->key);
+ jsgf_rule_free((jsgf_rule_t *)itor->ent->val);
+ }
+ hash_table_free(jsgf->rules);
+ for (itor = hash_table_iter(jsgf->imports); itor;
+ itor = hash_table_iter_next(itor)) {
+ ckd_free((char *)itor->ent->key);
+ jsgf_grammar_free((jsgf_t *)itor->ent->val);
+ }
+ hash_table_free(jsgf->imports);
+ for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
+ ckd_free(gnode_ptr(gn));
+ glist_free(jsgf->searchpath);
+ for (gn = jsgf->links; gn; gn = gnode_next(gn))
+ ckd_free(gnode_ptr(gn));
+ glist_free(jsgf->links);
+ }
+ ckd_free(jsgf->name);
+ ckd_free(jsgf->version);
+ ckd_free(jsgf->charset);
+ ckd_free(jsgf->locale);
+ ckd_free(jsgf);
+}
+
+static void
+jsgf_rhs_free(jsgf_rhs_t *rhs)
+{
+ gnode_t *gn;
+
+ if (rhs == NULL)
+ return;
+
+ jsgf_rhs_free(rhs->alt);
+ for (gn = rhs->atoms; gn; gn = gnode_next(gn))
+ jsgf_atom_free(gnode_ptr(gn));
+ glist_free(rhs->atoms);
+ ckd_free(rhs);
+}
+
+jsgf_atom_t *
+jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus)
+{
+ jsgf_rule_t *rule;
+ jsgf_atom_t *rule_atom;
+ jsgf_rhs_t *rhs;
+
+ /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */
+ /* Or if plus is true, (<name> | <name> <g0006>) */
+ rhs = ckd_calloc(1, sizeof(*rhs));
+ if (plus)
+ rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
+ else
+ rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
+ rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
+ rule_atom = jsgf_atom_new(rule->name, 1.0);
+ rhs = ckd_calloc(1, sizeof(*rhs));
+ rhs->atoms = glist_add_ptr(NULL, rule_atom);
+ rhs->atoms = glist_add_ptr(rhs->atoms, atom);
+ rule->rhs->alt = rhs;
+
+ return jsgf_atom_new(rule->name, 1.0);
+}
+
+jsgf_rule_t *
+jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp)
+{
+ jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
+ jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
+ rhs->alt = exp;
+ rhs->atoms = glist_add_ptr(NULL, atom);
+ return jsgf_define_rule(jsgf, NULL, rhs, 0);
+}
+
+void
+jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to)
+{
+ jsgf_link_t *link;
+
+ link = ckd_calloc(1, sizeof(*link));
+ link->from = from;
+ link->to = to;
+ link->atom = atom;
+ grammar->links = glist_add_ptr(grammar->links, link);
+}
+
+static char *
+extract_grammar_name(char *rule_name)
+{
+ char* dot_pos;
+ char* grammar_name = ckd_salloc(rule_name + 1);
+ if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
+ ckd_free(grammar_name);
+ return NULL;
+ }
+ *dot_pos='\0';
+ return grammar_name;
+}
+
+char const *
+jsgf_grammar_name(jsgf_t *jsgf)
+{
+ return jsgf->name;
+}
+
+static char *
+jsgf_fullname(jsgf_t *jsgf, const char *name)
+{
+ char *fullname;
+
+ /* Check if it is already qualified */
+ if (strchr(name + 1, '.'))
+ return ckd_salloc(name);
+
+ /* Skip leading < in name */
+ fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
+ sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
+ return fullname;
+}
+
+static char *
+jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name)
+{
+ char *fullname, *grammar_name;
+
+ /* Check if it is already qualified */
+ if (strchr(name + 1, '.'))
+ return ckd_salloc(name);
+
+ /* Skip leading < in name */
+ if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
+ return ckd_salloc(name);
+ fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
+ sprintf(fullname, "<%s.%s", grammar_name, name + 1);
+ ckd_free(grammar_name);
+
+ return fullname;
+}
+
+/* Extract as rulename everything after the secondlast dot, if existent.
+ * Because everything before the secondlast dot is the path-specification. */
+static char *
+importname2rulename(char *importname)
+{
+ char *rulename = ckd_salloc(importname);
+ char *last_dotpos;
+ char *secondlast_dotpos;
+
+ if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) {
+ *last_dotpos='\0';
+ if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) {
+ *last_dotpos='.';
+ *secondlast_dotpos='<';
+ secondlast_dotpos = ckd_salloc(secondlast_dotpos);
+ ckd_free(rulename);
+ return secondlast_dotpos;
+ }
+ else {
+ *last_dotpos='.';
+ return rulename;
+ }
+ }
+ else {
+ return rulename;
+ }
+}
+
+#define NO_NODE -1
+#define RECURSIVE_NODE -2
+
+/**
+ *
+ * Expand a right-hand-side of a rule (i.e. a single alternate).
+ *
+ * @returns the FSG state at the end of this rule, NO_NODE if there's an
+ * error, and RECURSIVE_NODE if the right-hand-side ended in right-recursion (i.e.
+ * a link to an earlier FSG state).
+ */
+static int
+expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs,
+ int rule_entry, int rule_exit)
+{
+ gnode_t *gn;
+ int lastnode;
+
+ /* Last node expanded in this sequence. */
+ lastnode = rule_entry;
+
+ /* Iterate over atoms in rhs and generate links/nodes */
+ for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
+ jsgf_atom_t *atom = gnode_ptr(gn);
+
+ if (jsgf_atom_is_rule(atom)) {
+ jsgf_rule_t *subrule;
+ char *fullname;
+ gnode_t *subnode;
+ jsgf_rule_stack_t *rule_stack_entry = NULL;
+
+ /* Special case for <NULL> and <VOID> pseudo-rules
+ If this is the only atom in the rhs, and it's the
+ first rhs in the rule, then emit a null transition,
+ creating an exit state if needed. */
+ if (0 == strcmp(atom->name, "<NULL>")) {
+ if (gn == rhs->atoms && gnode_next(gn) == NULL) {
+ if (rule_exit == NO_NODE) {
+ jsgf_add_link(grammar, atom,
+ lastnode, grammar->nstate);
+ rule_exit = lastnode = grammar->nstate;
+ ++grammar->nstate;
+ } else {
+ jsgf_add_link(grammar, atom,
+ lastnode, rule_exit);
+ }
+ }
+ continue;
+ }
+ else if (0 == strcmp(atom->name, "<VOID>")) {
+ /* Make this entire RHS unspeakable */
+ return NO_NODE;
+ }
+
+ fullname = jsgf_fullname_from_rule(rule, atom->name);
+ if (hash_table_lookup(grammar->rules, fullname, (void**)&subrule) == -1) {
+ E_ERROR("Undefined rule in RHS: %s\n", fullname);
+ ckd_free(fullname);
+ return NO_NODE;
+ }
+ ckd_free(fullname);
+
+ /* Look for this subrule in the stack of expanded rules */
+ for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode)) {
+ rule_stack_entry = (jsgf_rule_stack_t *)gnode_ptr(subnode);
+ if (rule_stack_entry->rule == subrule)
+ break;
+ }
+
+ if (subnode != NULL) {
+ /* Allow right-recursion only. */
+ if (gnode_next(gn) != NULL) {
+ E_ERROR("Only right-recursion is permitted (in %s.%s)\n",
+ grammar->name, rule->name);
+ return NO_NODE;
+ }
+ /* Add a link back to the beginning of this rule instance */
+ E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, rule_stack_entry->entry);
+ jsgf_add_link(grammar, atom, lastnode, rule_stack_entry->entry);
+
+ /* Let our caller know that this rhs didn't reach an
+ end state. */
+ lastnode = RECURSIVE_NODE;
+ }
+ else {
+ /* If this is the last atom in this rhs, link its
+ expansion to the parent rule's exit state.
+ Otherwise, create a new exit state for it. */
+ int subruleexit = NO_NODE;
+ if (gnode_next(gn) == NULL && rule_exit >= 0)
+ subruleexit = rule_exit;
+
+ /* Expand the subrule */
+ lastnode = expand_rule(grammar, subrule, lastnode, subruleexit);
+
+ if (lastnode == NO_NODE)
+ return NO_NODE;
+ }
+ }
+ else {
+ /* An exit-state is created if this isn't the last atom
+ in the rhs, or if the containing rule doesn't have an
+ exit state yet.
+ Otherwise, the rhs's exit state becomes the containing
+ rule's exit state. */
+ int exitstate;
+ if (gnode_next(gn) == NULL && rule_exit >= 0) {
+ exitstate = rule_exit;
+ } else {
+ exitstate = grammar->nstate;
+ ++grammar->nstate;
+ }
+
+ /* Add a link for this token */
+ jsgf_add_link(grammar, atom,
+ lastnode, exitstate);
+ lastnode = exitstate;
+ }
+ }
+
+ return lastnode;
+}
+
+static int
+expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry,
+ int rule_exit)
+{
+ jsgf_rule_stack_t* rule_stack_entry;
+ jsgf_rhs_t *rhs;
+
+ /* Push this rule onto the stack */
+ rule_stack_entry = (jsgf_rule_stack_t*)ckd_calloc(1, sizeof (jsgf_rule_stack_t));
+ rule_stack_entry->rule = rule;
+ rule_stack_entry->entry = rule_entry;
+ grammar->rulestack = glist_add_ptr(grammar->rulestack,
+ rule_stack_entry);
+
+ for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
+ int lastnode;
+
+ lastnode = expand_rhs(grammar, rule, rhs,
+ rule_entry, rule_exit);
+
+ if (lastnode == NO_NODE) {
+ return NO_NODE;
+ } else if (lastnode == RECURSIVE_NODE) {
+ /* The rhs ended with right-recursion, i.e. a transition to
+ an earlier state. Nothing needs to happen at this level. */
+ ;
+ } else if (rule_exit == NO_NODE) {
+ /* If this rule doesn't have an exit state yet, use the exit
+ state of its first right-hand-side.
+ All other right-hand-sides will use this exit state. */
+ assert (lastnode >= 0);
+ rule_exit = lastnode;
+ }
+ }
+
+ /* If no exit-state was created, use the entry-state. */
+ if (rule_exit == NO_NODE) {
+ rule_exit = rule_entry;
+ }
+
+ /* Pop this rule from the rule stack */
+ ckd_free(gnode_ptr(grammar->rulestack));
+ grammar->rulestack = gnode_free(grammar->rulestack, NULL);
+
+ return rule_exit;
+}
+
+jsgf_rule_iter_t *
+jsgf_rule_iter(jsgf_t *grammar)
+{
+ return hash_table_iter(grammar->rules);
+}
+
+jsgf_rule_t *
+jsgf_get_rule(jsgf_t *grammar, char const *name)
+{
+ void *val;
+ char *fullname;
+
+ fullname = string_join("<", name, ">", NULL);
+ if (hash_table_lookup(grammar->rules, fullname, &val) < 0) {
+ ckd_free(fullname);
+ return NULL;
+ }
+ ckd_free(fullname);
+ return (jsgf_rule_t *)val;
+}
+
+jsgf_rule_t *
+jsgf_get_public_rule(jsgf_t *grammar)
+{
+ jsgf_rule_iter_t *itor;
+ jsgf_rule_t *public_rule = NULL;
+
+ for (itor = jsgf_rule_iter(grammar); itor;
+ itor = jsgf_rule_iter_next(itor)) {
+ jsgf_rule_t *rule = jsgf_rule_iter_rule(itor);
+ if (jsgf_rule_public(rule)) {
+ const char *rule_name = jsgf_rule_name(rule);
+ char *dot_pos;
+ if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) {
+ public_rule = rule;
+ jsgf_rule_iter_free(itor);
+ break;
+ }
+ if (0 == strncmp(rule_name + 1, jsgf_grammar_name(grammar), dot_pos - rule_name - 1)) {
+ public_rule = rule;
+ jsgf_rule_iter_free(itor);
+ break;
+ }
+ }
+ }
+ return public_rule;
+}
+
+char const *
+jsgf_rule_name(jsgf_rule_t *rule)
+{
+ return rule->name;
+}
+
+int
+jsgf_rule_public(jsgf_rule_t *rule)
+{
+ return rule->is_public;
+}
+
+static fsg_model_t *
+jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule,
+ logmath_t *lmath, float32 lw, int do_closure)
+{
+ fsg_model_t *fsg;
+ glist_t nulls;
+ gnode_t *gn;
+ int rule_entry, rule_exit;
+
+ /* Clear previous links */
+ for (gn = grammar->links; gn; gn = gnode_next(gn)) {
+ ckd_free(gnode_ptr(gn));
+ }
+ glist_free(grammar->links);
+ grammar->links = NULL;
+ grammar->nstate = 0;
+
+ /* Create the top-level entry state, and expand the
+ top-level rule. */
+ rule_entry = grammar->nstate++;
+ rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE);
+
+ /* If no exit-state was created, create one. */
+ if (rule_exit == NO_NODE) {
+ rule_exit = grammar->nstate++;
+ jsgf_add_link(grammar, NULL, rule_entry, rule_exit);
+ }
+
+ fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
+ fsg->start_state = rule_entry;
+ fsg->final_state = rule_exit;
+ grammar->links = glist_reverse(grammar->links);
+ for (gn = grammar->links; gn; gn = gnode_next(gn)) {
+ jsgf_link_t *link = gnode_ptr(gn);
+
+ if (link->atom) {
+ if (jsgf_atom_is_rule(link->atom)) {
+ fsg_model_null_trans_add(fsg, link->from, link->to,
+ logmath_log(lmath, link->atom->weight));
+ }
+ else {
+ int wid = fsg_model_word_add(fsg, link->atom->name);
+ fsg_model_trans_add(fsg, link->from, link->to,
+ logmath_log(lmath, link->atom->weight), wid);
+ }
+ }
+ else {
+ fsg_model_null_trans_add(fsg, link->from, link->to, 0);
+ }
+ }
+ if (do_closure) {
+ nulls = fsg_model_null_trans_closure(fsg, NULL);
+ glist_free(nulls);
+ }
+
+ return fsg;
+}
+
+fsg_model_t *
+jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule,
+ logmath_t *lmath, float32 lw)
+{
+ return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
+}
+
+fsg_model_t *
+jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule,
+ logmath_t *lmath, float32 lw)
+{
+ return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
+}
+
+fsg_model_t *
+jsgf_read_file(const char *file, logmath_t * lmath, float32 lw)
+{
+ fsg_model_t *fsg;
+ jsgf_rule_t *rule;
+ jsgf_t *jsgf;
+ jsgf_rule_iter_t *itor;
+
+ if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) {
+ E_ERROR("Error parsing file: %s\n", file);
+ return NULL;
+ }
+
+ rule = NULL;
+ for (itor = jsgf_rule_iter(jsgf); itor;
+ itor = jsgf_rule_iter_next(itor)) {
+ rule = jsgf_rule_iter_rule(itor);
+ if (jsgf_rule_public(rule)) {
+ jsgf_rule_iter_free(itor);
+ break;
+ }
+ }
+ if (rule == NULL) {
+ E_ERROR("No public rules found in %s\n", file);
+ return NULL;
+ }
+ fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
+ jsgf_grammar_free(jsgf);
+ return fsg;
+}
+
+fsg_model_t *
+jsgf_read_string(const char *string, logmath_t * lmath, float32 lw)
+{
+ fsg_model_t *fsg;
+ jsgf_rule_t *rule;
+ jsgf_t *jsgf;
+ jsgf_rule_iter_t *itor;
+
+ if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) {
+ E_ERROR("Error parsing input string\n");
+ return NULL;
+ }
+
+ rule = NULL;
+ for (itor = jsgf_rule_iter(jsgf); itor;
+ itor = jsgf_rule_iter_next(itor)) {
+ rule = jsgf_rule_iter_rule(itor);
+ if (jsgf_rule_public(rule)) {
+ jsgf_rule_iter_free(itor);
+ break;
+ }
+ }
+ if (rule == NULL) {
+ jsgf_grammar_free(jsgf);
+ E_ERROR("No public rules found in input string\n");
+ return NULL;
+ }
+ fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
+ jsgf_grammar_free(jsgf);
+ return fsg;
+}
+
+
+int
+jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
+{
+ fsg_model_t *fsg;
+ logmath_t *lmath = logmath_init(1.0001, 0, 0);
+
+ if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
+ goto error_out;
+
+ fsg_model_write(fsg, outfh);
+ logmath_free(lmath);
+ return 0;
+
+error_out:
+ logmath_free(lmath);
+ return -1;
+}
+
+jsgf_rule_t *
+jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public)
+{
+ jsgf_rule_t *rule;
+ void *val;
+
+ if (name == NULL) {
+ name = ckd_malloc(strlen(jsgf->name) + 16);
+ sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules));
+ }
+ else {
+ char *newname;
+
+ newname = jsgf_fullname(jsgf, name);
+ name = newname;
+ }
+
+ rule = ckd_calloc(1, sizeof(*rule));
+ rule->refcnt = 1;
+ rule->name = ckd_salloc(name);
+ rule->rhs = rhs;
+ rule->is_public = is_public;
+
+ E_INFO("Defined rule: %s%s\n",
+ rule->is_public ? "PUBLIC " : "",
+ rule->name);
+ val = hash_table_enter(jsgf->rules, name, rule);
+ if (val != (void *)rule) {
+ E_WARN("Multiply defined symbol: %s\n", name);
+ }
+ return rule;
+}
+
+jsgf_rule_t *
+jsgf_rule_retain(jsgf_rule_t *rule)
+{
+ ++rule->refcnt;
+ return rule;
+}
+
+int
+jsgf_rule_free(jsgf_rule_t *rule)
+{
+ if (rule == NULL)
+ return 0;
+ if (--rule->refcnt > 0)
+ return rule->refcnt;
+ jsgf_rhs_free(rule->rhs);
+ ckd_free(rule->name);
+ ckd_free(rule);
+ return 0;
+}
+
+
+/* FIXME: This should go in libsphinxutil */
+static char *
+path_list_search(glist_t paths, char *path)
+{
+ gnode_t *gn;
+
+ for (gn = paths; gn; gn = gnode_next(gn)) {
+ char *fullpath;
+ FILE *tmp;
+
+ fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
+ tmp = fopen(fullpath, "r");
+ if (tmp != NULL) {
+ fclose(tmp);
+ return fullpath;
+ }
+ else {
+ ckd_free(fullpath);
+ }
+ }
+ return NULL;
+}
+
+jsgf_rule_t *
+jsgf_import_rule(jsgf_t *jsgf, char *name)
+{
+ char *c, *path, *newpath;
+ size_t namelen, packlen;
+ void *val;
+ jsgf_t *imp;
+ int import_all;
+
+ /* Trim the leading and trailing <> */
+ namelen = strlen(name);
+ path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
+ strcpy(path, name + 1);
+ /* Split off the first part of the name */
+ c = strrchr(path, '.');
+ if (c == NULL) {
+ E_ERROR("Imported rule is not qualified: %s\n", name);
+ ckd_free(path);
+ return NULL;
+ }
+ packlen = c - path;
+ *c = '\0';
+
+ /* Look for import foo.* */
+ import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>"));
+
+ /* Construct a filename. */
+ for (c = path; *c; ++c)
+ if (*c == '.') *c = '/';
+ strcat(path, ".gram");
+ newpath = path_list_search(jsgf->searchpath, path);
+ if (newpath == NULL) {
+ E_ERROR("Failed to find grammar %s\n", path);
+ ckd_free(path);
+ return NULL;
+ }
+ ckd_free(path);
+
+ path = newpath;
+ E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
+
+ /* FIXME: Also, we need to make sure that path is fully qualified
+ * here, by adding any prefixes from jsgf->name to it. */
+ /* See if we have parsed it already */
+ if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
+ E_INFO("Already imported %s\n", path);
+ imp = val;
+ ckd_free(path);
+ }
+ else {
+ /* If not, parse it. */
+ imp = jsgf_parse_file(path, jsgf);
+ val = hash_table_enter(jsgf->imports, path, imp);
+ if (val != (void *)imp) {
+ E_WARN("Multiply imported file: %s\n", path);
+ }
+ }
+ if (imp != NULL) {
+ hash_iter_t *itor;
+ /* Look for public rules matching rulename. */
+ for (itor = hash_table_iter(imp->rules); itor;
+ itor = hash_table_iter_next(itor)) {
+ hash_entry_t *he = itor->ent;
+ jsgf_rule_t *rule = hash_entry_val(he);
+ int rule_matches;
+ char *rule_name = importname2rulename(name);
+
+ if (import_all) {
+ /* Match package name (symbol table is shared) */
+ rule_matches = !strncmp(rule_name, rule->name, packlen + 1);
+ }
+ else {
+ /* Exact match */
+ rule_matches = !strcmp(rule_name, rule->name);
+ }
+ ckd_free(rule_name);
+ if (rule->is_public && rule_matches) {
+ void *val;
+ char *newname;
+
+ /* Link this rule into the current namespace. */
+ c = strrchr(rule->name, '.');
+ assert(c != NULL);
+ newname = jsgf_fullname(jsgf, c);
+
+ E_INFO("Imported %s\n", newname);
+ val = hash_table_enter(jsgf->rules, newname,
+ jsgf_rule_retain(rule));
+ if (val != (void *)rule) {
+ E_WARN("Multiply defined symbol: %s\n", newname);
+ }
+ if (!import_all) {
+ hash_table_iter_free(itor);
+ return rule;
+ }
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void
+jsgf_set_search_path(jsgf_t *jsgf, const char *filename)
+{
+ char *jsgf_path;
+
+#if !defined(_WIN32_WCE)
+ if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
+ char *word, *c;
+ /* FIXME: This should be a function in libsphinxbase. */
+ word = jsgf_path = ckd_salloc(jsgf_path);
+ while ((c = strchr(word, ':'))) {
+ *c = '\0';
+ jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
+ word = c + 1;
+ }
+ jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
+ jsgf->searchpath = glist_reverse(jsgf->searchpath);
+ return;
+ }
+#endif
+
+ if (!filename) {
+ jsgf->searchpath = glist_add_ptr(jsgf->searchpath, ckd_salloc("."));
+ return;
+ }
+
+ jsgf_path = ckd_salloc(filename);
+ path2dirname(filename, jsgf_path);
+ jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path);
+}
+
+jsgf_t *
+jsgf_parse_file(const char *filename, jsgf_t *parent)
+{
+ yyscan_t yyscanner;
+ jsgf_t *jsgf;
+ int yyrv;
+ FILE *in = NULL;
+
+ yylex_init(&yyscanner);
+ if (filename == NULL) {
+ yyset_in(stdin, yyscanner);
+ }
+ else {
+ in = fopen(filename, "r");
+ if (in == NULL) {
+ E_ERROR_SYSTEM("Failed to open %s for parsing", filename);
+ return NULL;
+ }
+ yyset_in(in, yyscanner);
+ }
+
+ jsgf = jsgf_grammar_new(parent);
+
+ if (!parent)
+ jsgf_set_search_path(jsgf, filename);
+
+ yyrv = yyparse(yyscanner, jsgf);
+ if (yyrv != 0) {
+ E_ERROR("Failed to parse JSGF grammar from '%s'\n", filename ? filename : "(stdin)");
+ jsgf_grammar_free(jsgf);
+ yylex_destroy(yyscanner);
+ return NULL;
+ }
+ if (in)
+ fclose(in);
+ yylex_destroy(yyscanner);
+
+ return jsgf;
+}
+
+jsgf_t *
+jsgf_parse_string(const char *string, jsgf_t * parent)
+{
+ yyscan_t yyscanner;
+ jsgf_t *jsgf;
+ int yyrv;
+ YY_BUFFER_STATE buf;
+
+ yylex_init(&yyscanner);
+ buf = yy_scan_string(string, yyscanner);
+
+ jsgf = jsgf_grammar_new(parent);
+ if (!parent)
+ jsgf_set_search_path(jsgf, NULL);
+
+ yyrv = yyparse(yyscanner, jsgf);
+ if (yyrv != 0) {
+ E_ERROR("Failed to parse JSGF grammar from input string\n");
+ jsgf_grammar_free(jsgf);
+ yy_delete_buffer(buf, yyscanner);
+ yylex_destroy(yyscanner);
+ return NULL;
+ }
+ yy_delete_buffer(buf, yyscanner);
+ yylex_destroy(yyscanner);
+
+ return jsgf;
+}
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h
new file mode 100644
index 000000000..a5cbc9833
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h
@@ -0,0 +1,140 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+#ifndef __JSGF_INTERNAL_H__
+#define __JSGF_INTERNAL_H__
+
+/**
+ * @file jsgf_internal.h Internal definitions for JSGF grammar compiler
+ */
+
+#include <stdio.h>
+
+#include <sphinxbase/hash_table.h>
+#include <sphinxbase/glist.h>
+#include <sphinxbase/fsg_model.h>
+#include <sphinxbase/logmath.h>
+#include <sphinxbase/strfuncs.h>
+#include <sphinxbase/jsgf.h>
+
+
+/* Flex uses strdup which is missing on WinCE */
+#if defined(_WIN32) || defined(_WIN32_WCE)
+#define strdup _strdup
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+/* Fool Emacs. */
+}
+#endif
+
+#define YY_NO_INPUT /* Silence a compiler warning. */
+
+typedef struct jsgf_rhs_s jsgf_rhs_t;
+typedef struct jsgf_atom_s jsgf_atom_t;
+typedef struct jsgf_link_s jsgf_link_t;
+typedef struct jsgf_rule_stack_s jsgf_rule_stack_t;
+
+struct jsgf_s {
+ char *version; /**< JSGF version (from header) */
+ char *charset; /**< JSGF charset (default UTF-8) */
+ char *locale; /**< JSGF locale (default C) */
+ char *name; /**< Grammar name */
+
+ hash_table_t *rules; /**< Defined or imported rules in this grammar. */
+ hash_table_t *imports; /**< Pointers to imported grammars. */
+ jsgf_t *parent; /**< Parent grammar (if this is an imported one) */
+ glist_t searchpath; /**< List of directories to search for grammars. */
+
+ /* Scratch variables for FSG conversion. */
+ int nstate; /**< Number of generated states. */
+ glist_t links; /**< Generated FSG links. */
+ glist_t rulestack; /**< Stack of currently expanded rules. */
+};
+
+/* A type to keep track of the stack of rules currently being expanded. */
+struct jsgf_rule_stack_s {
+ jsgf_rule_t *rule; /**< The rule being expanded */
+ int entry; /**< The entry-state for this expansion */
+};
+
+struct jsgf_rule_s {
+ int refcnt; /**< Reference count. */
+ char *name; /**< Rule name (NULL for an alternation/grouping) */
+ int is_public; /**< Is this rule marked 'public'? */
+ jsgf_rhs_t *rhs; /**< Expansion */
+};
+
+struct jsgf_rhs_s {
+ glist_t atoms; /**< Sequence of items */
+ jsgf_rhs_t *alt; /**< Linked list of alternates */
+};
+
+struct jsgf_atom_s {
+ char *name; /**< Rule or token name */
+ glist_t tags; /**< Tags, if any (glist_t of char *) */
+ float weight; /**< Weight (default 1) */
+};
+
+struct jsgf_link_s {
+ jsgf_atom_t *atom; /**< Name, tags, weight */
+ int from; /**< From state */
+ int to; /**< To state */
+};
+
+#define jsgf_atom_is_rule(atom) ((atom)->name[0] == '<')
+
+void jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to);
+jsgf_atom_t *jsgf_atom_new(char *name, float weight);
+jsgf_atom_t *jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus);
+jsgf_rule_t *jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp);
+jsgf_rule_t *jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public);
+jsgf_rule_t *jsgf_import_rule(jsgf_t *jsgf, char *name);
+
+int jsgf_atom_free(jsgf_atom_t *atom);
+int jsgf_rule_free(jsgf_rule_t *rule);
+jsgf_rule_t *jsgf_rule_retain(jsgf_rule_t *rule);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* __JSGF_H__ */
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c
new file mode 100644
index 000000000..20acbb9d9
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c
@@ -0,0 +1,1799 @@
+
+/* A Bison parser, made by GNU Bison 2.4.1. */
+
+/* Skeleton implementation for Bison's Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+ simplifying the original so-called "semantic" parser. */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+ infringing on user name space. This should be done even for local
+ variables, as they might otherwise be expanded by user macros.
+ There are some unavoidable exceptions within include files to
+ define necessary library symbols; they are noted "INFRINGES ON
+ USER NAME SPACE" below. */
+
+/* Identify Bison output. */
+#define YYBISON 1
+
+/* Bison version. */
+#define YYBISON_VERSION "2.4.1"
+
+/* Skeleton name. */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers. */
+#define YYPURE 1
+
+/* Push parsers. */
+#define YYPUSH 0
+
+/* Pull parsers. */
+#define YYPULL 1
+
+/* Using locations. */
+#define YYLSP_NEEDED 0
+
+
+
+/* Copy the first part of user declarations. */
+
+/* Line 189 of yacc.c */
+#line 37 "jsgf_parser.y"
+
+#define YYERROR_VERBOSE
+
+#include <stdio.h>
+#include <string.h>
+
+#include <sphinxbase/hash_table.h>
+#include <sphinxbase/ckd_alloc.h>
+#include <sphinxbase/err.h>
+
+#include "jsgf_internal.h"
+#include "jsgf_parser.h"
+#include "jsgf_scanner.h"
+
+/* Suppress warnings from generated code */
+#if defined _MSC_VER
+#pragma warning(disable: 4273)
+#endif
+
+void yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s);
+
+
+
+/* Line 189 of yacc.c */
+#line 97 "jsgf_parser.c"
+
+/* Enabling traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+
+/* Enabling verbose error messages. */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE 0
+#endif
+
+/* Enabling the token table. */
+#ifndef YYTOKEN_TABLE
+# define YYTOKEN_TABLE 0
+#endif
+
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ HEADER = 258,
+ GRAMMAR = 259,
+ IMPORT = 260,
+ PUBLIC = 261,
+ TOKEN = 262,
+ RULENAME = 263,
+ TAG = 264,
+ WEIGHT = 265
+ };
+#endif
+/* Tokens. */
+#define HEADER 258
+#define GRAMMAR 259
+#define IMPORT 260
+#define PUBLIC 261
+#define TOKEN 262
+#define RULENAME 263
+#define TAG 264
+#define WEIGHT 265
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 214 of yacc.c */
+#line 65 "jsgf_parser.y"
+
+ char *name;
+ float weight;
+ jsgf_rule_t *rule;
+ jsgf_rhs_t *rhs;
+ jsgf_atom_t *atom;
+
+
+
+/* Line 214 of yacc.c */
+#line 163 "jsgf_parser.c"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+/* Copy the second part of user declarations. */
+
+
+/* Line 264 of yacc.c */
+#line 175 "jsgf_parser.c"
+
+#ifdef short
+# undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+# define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+# define YYSIZE_T size_t
+# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+# define YYSIZE_T size_t
+# else
+# define YYSIZE_T unsigned int
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
+
+#ifndef YY_
+# if YYENABLE_NLS
+# if ENABLE_NLS
+# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+# define YY_(msgid) dgettext ("bison-runtime", msgid)
+# endif
+# endif
+# ifndef YY_
+# define YY_(msgid) msgid
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E. */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(e) ((void) (e))
+#else
+# define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions. */
+#ifndef lint
+# define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static int
+YYID (int yyi)
+#else
+static int
+YYID (yyi)
+ int yyi;
+#endif
+{
+ return yyi;
+}
+#endif
+
+#if ! defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols. */
+
+# ifdef YYSTACK_USE_ALLOCA
+# if YYSTACK_USE_ALLOCA
+# ifdef __GNUC__
+# define YYSTACK_ALLOC __builtin_alloca
+# elif defined __BUILTIN_VA_ARG_INCR
+# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+# elif defined _AIX
+# define YYSTACK_ALLOC __alloca
+# elif defined _MSC_VER
+# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+# define alloca _alloca
+# else
+# define YYSTACK_ALLOC alloca
+# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef _STDLIB_H
+# define _STDLIB_H 1
+# endif
+# endif
+# endif
+# endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+ /* Pacify GCC's `empty if-body' warning. */
+# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+# ifndef YYSTACK_ALLOC_MAXIMUM
+ /* The OS might guarantee only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
+ to allow for a few compiler-allocated temporary stack slots. */
+# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+# endif
+# else
+# define YYSTACK_ALLOC YYMALLOC
+# define YYSTACK_FREE YYFREE
+# ifndef YYSTACK_ALLOC_MAXIMUM
+# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+# endif
+# if (defined __cplusplus && ! defined _STDLIB_H \
+ && ! ((defined YYMALLOC || defined malloc) \
+ && (defined YYFREE || defined free)))
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef _STDLIB_H
+# define _STDLIB_H 1
+# endif
+# endif
+# ifndef YYMALLOC
+# define YYMALLOC malloc
+# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# ifndef YYFREE
+# define YYFREE free
+# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+
+#if (! defined yyoverflow \
+ && (! defined __cplusplus \
+ || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member. */
+union yyalloc
+{
+ yytype_int16 yyss_alloc;
+ YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next. */
+# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+ N elements. */
+# define YYSTACK_BYTES(N) \
+ ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+ + YYSTACK_GAP_MAXIMUM)
+
+/* Copy COUNT objects from FROM to TO. The source and destination do
+ not overlap. */
+# ifndef YYCOPY
+# if defined __GNUC__ && 1 < __GNUC__
+# define YYCOPY(To, From, Count) \
+ __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+# else
+# define YYCOPY(To, From, Count) \
+ do \
+ { \
+ YYSIZE_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (To)[yyi] = (From)[yyi]; \
+ } \
+ while (YYID (0))
+# endif
+# endif
+
+/* Relocate STACK from its old location to the new one. The
+ local variables YYSIZE and YYSTACKSIZE give the old and new number of
+ elements in the stack, and YYPTR gives the new location of the
+ stack. Advance YYPTR to a properly aligned location for the next
+ stack. */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack) \
+ do \
+ { \
+ YYSIZE_T yynewbytes; \
+ YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \
+ Stack = &yyptr->Stack_alloc; \
+ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+ yyptr += yynewbytes / sizeof (*yyptr); \
+ } \
+ while (YYID (0))
+
+#endif
+
+/* YYFINAL -- State number of the termination state. */
+#define YYFINAL 7
+/* YYLAST -- Last index in YYTABLE. */
+#define YYLAST 54
+
+/* YYNTOKENS -- Number of terminals. */
+#define YYNTOKENS 20
+/* YYNNTS -- Number of nonterminals. */
+#define YYNNTS 16
+/* YYNRULES -- Number of rules. */
+#define YYNRULES 33
+/* YYNRULES -- Number of states. */
+#define YYNSTATES 58
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
+#define YYUNDEFTOK 2
+#define YYMAXUTOK 265
+
+#define YYTRANSLATE(YYX) \
+ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
+static const yytype_uint8 yytranslate[] =
+{
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 14, 15, 18, 19, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 11,
+ 2, 12, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 16, 2, 17, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 13, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10
+};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+ YYRHS. */
+static const yytype_uint8 yyprhs[] =
+{
+ 0, 0, 3, 5, 8, 12, 15, 18, 22, 27,
+ 33, 37, 39, 42, 46, 48, 51, 56, 62, 64,
+ 68, 70, 73, 75, 78, 80, 83, 87, 91, 93,
+ 95, 97, 99, 102
+};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS. */
+static const yytype_int8 yyrhs[] =
+{
+ 21, 0, -1, 22, -1, 22, 27, -1, 22, 25,
+ 27, -1, 23, 24, -1, 3, 11, -1, 3, 7,
+ 11, -1, 3, 7, 7, 11, -1, 3, 7, 7,
+ 7, 11, -1, 4, 7, 11, -1, 26, -1, 25,
+ 26, -1, 5, 8, 11, -1, 28, -1, 27, 28,
+ -1, 8, 12, 29, 11, -1, 6, 8, 12, 29,
+ 11, -1, 30, -1, 29, 13, 30, -1, 31, -1,
+ 30, 31, -1, 32, -1, 31, 9, -1, 35, -1,
+ 10, 35, -1, 14, 29, 15, -1, 16, 29, 17,
+ -1, 7, -1, 8, -1, 33, -1, 34, -1, 35,
+ 18, -1, 35, 19, -1
+};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
+static const yytype_uint8 yyrline[] =
+{
+ 0, 82, 82, 83, 84, 87, 90, 91, 92, 93,
+ 97, 100, 101, 104, 107, 108, 111, 112, 115, 116,
+ 121, 123, 127, 128, 132, 133, 136, 139, 142, 143,
+ 144, 145, 146, 147
+};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
+static const char *const yytname[] =
+{
+ "$end", "error", "$undefined", "HEADER", "GRAMMAR", "IMPORT", "PUBLIC",
+ "TOKEN", "RULENAME", "TAG", "WEIGHT", "';'", "'='", "'|'", "'('", "')'",
+ "'['", "']'", "'*'", "'+'", "$accept", "grammar", "header",
+ "jsgf_header", "grammar_header", "import_header", "import_statement",
+ "rule_list", "rule", "alternate_list", "rule_expansion",
+ "tagged_rule_item", "rule_item", "rule_group", "rule_optional",
+ "rule_atom", 0
+};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+ token YYLEX-NUM. */
+static const yytype_uint16 yytoknum[] =
+{
+ 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
+ 265, 59, 61, 124, 40, 41, 91, 93, 42, 43
+};
+# endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
+static const yytype_uint8 yyr1[] =
+{
+ 0, 20, 21, 21, 21, 22, 23, 23, 23, 23,
+ 24, 25, 25, 26, 27, 27, 28, 28, 29, 29,
+ 30, 30, 31, 31, 32, 32, 33, 34, 35, 35,
+ 35, 35, 35, 35
+};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
+static const yytype_uint8 yyr2[] =
+{
+ 0, 2, 1, 2, 3, 2, 2, 3, 4, 5,
+ 3, 1, 2, 3, 1, 2, 4, 5, 1, 3,
+ 1, 2, 1, 2, 1, 2, 3, 3, 1, 1,
+ 1, 1, 2, 2
+};
+
+/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
+ STATE-NUM when YYTABLE doesn't specify something else to do. Zero
+ means the default is an error. */
+static const yytype_uint8 yydefact[] =
+{
+ 0, 0, 0, 2, 0, 0, 6, 1, 0, 0,
+ 0, 0, 11, 3, 14, 0, 5, 0, 7, 0,
+ 0, 0, 12, 4, 15, 0, 0, 8, 13, 0,
+ 28, 29, 0, 0, 0, 0, 18, 20, 22, 30,
+ 31, 24, 10, 9, 0, 25, 0, 0, 16, 0,
+ 21, 23, 32, 33, 17, 26, 27, 19
+};
+
+/* YYDEFGOTO[NTERM-NUM]. */
+static const yytype_int8 yydefgoto[] =
+{
+ -1, 2, 3, 4, 16, 11, 12, 13, 14, 35,
+ 36, 37, 38, 39, 40, 41
+};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+ STATE-NUM. */
+#define YYPACT_NINF -37
+static const yytype_int8 yypact[] =
+{
+ -1, -2, 36, 22, 35, 8, -37, -37, 32, 33,
+ 30, 22, -37, 17, -37, 37, -37, 13, -37, 34,
+ 31, -4, -37, 17, -37, 38, 39, -37, -37, -4,
+ -37, -37, 0, -4, -4, 18, -4, 42, -37, -37,
+ -37, 19, -37, -37, 21, 19, 20, 9, -37, -4,
+ 42, -37, -37, -37, -37, -37, -37, -4
+};
+
+/* YYPGOTO[NTERM-NUM]. */
+static const yytype_int8 yypgoto[] =
+{
+ -37, -37, -37, -37, -37, -37, 41, 43, -12, -16,
+ -3, -36, -37, -37, -37, 15
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
+ positive, shift that token. If negative, reduce the rule which
+ number is the opposite. If zero, do what YYDEFACT says.
+ If YYTABLE_NINF, syntax error. */
+#define YYTABLE_NINF -1
+static const yytype_uint8 yytable[] =
+{
+ 50, 24, 1, 30, 31, 5, 32, 30, 31, 6,
+ 33, 24, 34, 44, 33, 17, 34, 46, 47, 18,
+ 26, 50, 49, 9, 27, 10, 56, 8, 9, 48,
+ 10, 49, 54, 49, 49, 55, 7, 52, 53, 15,
+ 19, 20, 21, 29, 25, 28, 57, 45, 0, 42,
+ 43, 51, 22, 0, 23
+};
+
+static const yytype_int8 yycheck[] =
+{
+ 36, 13, 3, 7, 8, 7, 10, 7, 8, 11,
+ 14, 23, 16, 29, 14, 7, 16, 33, 34, 11,
+ 7, 57, 13, 6, 11, 8, 17, 5, 6, 11,
+ 8, 13, 11, 13, 13, 15, 0, 18, 19, 4,
+ 8, 8, 12, 12, 7, 11, 49, 32, -1, 11,
+ 11, 9, 11, -1, 11
+};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+ symbol of state STATE-NUM. */
+static const yytype_uint8 yystos[] =
+{
+ 0, 3, 21, 22, 23, 7, 11, 0, 5, 6,
+ 8, 25, 26, 27, 28, 4, 24, 7, 11, 8,
+ 8, 12, 26, 27, 28, 7, 7, 11, 11, 12,
+ 7, 8, 10, 14, 16, 29, 30, 31, 32, 33,
+ 34, 35, 11, 11, 29, 35, 29, 29, 11, 13,
+ 31, 9, 18, 19, 11, 15, 17, 30
+};
+
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY (-2)
+#define YYEOF 0
+
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrorlab
+
+
+/* Like YYERROR except do call yyerror. This remains here temporarily
+ to ease the transition to the new meaning of YYERROR, for GCC.
+ Once GCC version 2 has supplanted version 1, this can go. */
+
+#define YYFAIL goto yyerrlab
+
+#define YYRECOVERING() (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value) \
+do \
+ if (yychar == YYEMPTY && yylen == 1) \
+ { \
+ yychar = (Token); \
+ yylval = (Value); \
+ yytoken = YYTRANSLATE (yychar); \
+ YYPOPSTACK (1); \
+ goto yybackup; \
+ } \
+ else \
+ { \
+ yyerror (yyscanner, jsgf, YY_("syntax error: cannot back up")); \
+ YYERROR; \
+ } \
+while (YYID (0))
+
+
+#define YYTERROR 1
+#define YYERRCODE 256
+
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+ If N is 0, then set CURRENT to the empty location which ends
+ the previous symbol: RHS[0] (always defined). */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+# define YYLLOC_DEFAULT(Current, Rhs, N) \
+ do \
+ if (YYID (N)) \
+ { \
+ (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
+ (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
+ (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
+ (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
+ } \
+ else \
+ { \
+ (Current).first_line = (Current).last_line = \
+ YYRHSLOC (Rhs, 0).last_line; \
+ (Current).first_column = (Current).last_column = \
+ YYRHSLOC (Rhs, 0).last_column; \
+ } \
+ while (YYID (0))
+#endif
+
+
+/* YY_LOCATION_PRINT -- Print the location on the stream.
+ This macro was not mandated originally: define only if we know
+ we won't break user code: when these are the locations we know. */
+
+#ifndef YY_LOCATION_PRINT
+# if YYLTYPE_IS_TRIVIAL
+# define YY_LOCATION_PRINT(File, Loc) \
+ fprintf (File, "%d.%d-%d.%d", \
+ (Loc).first_line, (Loc).first_column, \
+ (Loc).last_line, (Loc).last_column)
+# else
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+# endif
+#endif
+
+
+/* YYLEX -- calling `yylex' with the right arguments. */
+
+#ifdef YYLEX_PARAM
+# define YYLEX yylex (&yylval, YYLEX_PARAM)
+#else
+# define YYLEX yylex (&yylval, yyscanner)
+#endif
+
+/* Enable debugging if requested. */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+# define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args) \
+do { \
+ if (yydebug) \
+ YYFPRINTF Args; \
+} while (YYID (0))
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
+do { \
+ if (yydebug) \
+ { \
+ YYFPRINTF (stderr, "%s ", Title); \
+ yy_symbol_print (stderr, \
+ Type, Value, yyscanner, jsgf); \
+ YYFPRINTF (stderr, "\n"); \
+ } \
+} while (YYID (0))
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT. |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf)
+#else
+static void
+yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf)
+ FILE *yyoutput;
+ int yytype;
+ YYSTYPE const * const yyvaluep;
+ void* yyscanner;
+ jsgf_t *jsgf;
+#endif
+{
+ if (!yyvaluep)
+ return;
+ YYUSE (yyscanner);
+ YYUSE (jsgf);
+# ifdef YYPRINT
+ if (yytype < YYNTOKENS)
+ YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# else
+ YYUSE (yyoutput);
+# endif
+ switch (yytype)
+ {
+ default:
+ break;
+ }
+}
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT. |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf)
+#else
+static void
+yy_symbol_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf)
+ FILE *yyoutput;
+ int yytype;
+ YYSTYPE const * const yyvaluep;
+ void* yyscanner;
+ jsgf_t *jsgf;
+#endif
+{
+ if (yytype < YYNTOKENS)
+ YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
+ else
+ YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+ yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf);
+ YYFPRINTF (yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included). |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
+#else
+static void
+yy_stack_print (yybottom, yytop)
+ yytype_int16 *yybottom;
+ yytype_int16 *yytop;
+#endif
+{
+ YYFPRINTF (stderr, "Stack now");
+ for (; yybottom <= yytop; yybottom++)
+ {
+ int yybot = *yybottom;
+ YYFPRINTF (stderr, " %d", yybot);
+ }
+ YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top) \
+do { \
+ if (yydebug) \
+ yy_stack_print ((Bottom), (Top)); \
+} while (YYID (0))
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced. |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_reduce_print (YYSTYPE *yyvsp, int yyrule, void* yyscanner, jsgf_t *jsgf)
+#else
+static void
+yy_reduce_print (yyvsp, yyrule, yyscanner, jsgf)
+ YYSTYPE *yyvsp;
+ int yyrule;
+ void* yyscanner;
+ jsgf_t *jsgf;
+#endif
+{
+ int yynrhs = yyr2[yyrule];
+ int yyi;
+ unsigned long int yylno = yyrline[yyrule];
+ YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
+ yyrule - 1, yylno);
+ /* The symbols being reduced. */
+ for (yyi = 0; yyi < yynrhs; yyi++)
+ {
+ YYFPRINTF (stderr, " $%d = ", yyi + 1);
+ yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
+ &(yyvsp[(yyi + 1) - (yynrhs)])
+ , yyscanner, jsgf);
+ YYFPRINTF (stderr, "\n");
+ }
+}
+
+# define YY_REDUCE_PRINT(Rule) \
+do { \
+ if (yydebug) \
+ yy_reduce_print (yyvsp, Rule, yyscanner, jsgf); \
+} while (YYID (0))
+
+/* Nonzero means print parse trace. It is left uninitialized so that
+ multiple parsers can coexist. */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks. */
+#ifndef YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+ if the built-in stack extension method is used).
+
+ Do not make this value too large; the results are undefined if
+ YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+ evaluated with infinite-precision integer arithmetic. */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+
+#if YYERROR_VERBOSE
+
+# ifndef yystrlen
+# if defined __GLIBC__ && defined _STRING_H
+# define yystrlen strlen
+# else
+/* Return the length of YYSTR. */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T
+yystrlen (const char *yystr)
+#else
+static YYSIZE_T
+yystrlen (yystr)
+ const char *yystr;
+#endif
+{
+ YYSIZE_T yylen;
+ for (yylen = 0; yystr[yylen]; yylen++)
+ continue;
+ return yylen;
+}
+# endif
+# endif
+
+# ifndef yystpcpy
+# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+# define yystpcpy stpcpy
+# else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+ YYDEST. */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static char *
+yystpcpy (char *yydest, const char *yysrc)
+#else
+static char *
+yystpcpy (yydest, yysrc)
+ char *yydest;
+ const char *yysrc;
+#endif
+{
+ char *yyd = yydest;
+ const char *yys = yysrc;
+
+ while ((*yyd++ = *yys++) != '\0')
+ continue;
+
+ return yyd - 1;
+}
+# endif
+# endif
+
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+ quotes and backslashes, so that it's suitable for yyerror. The
+ heuristic is that double-quoting is unnecessary unless the string
+ contains an apostrophe, a comma, or backslash (other than
+ backslash-backslash). YYSTR is taken from yytname. If YYRES is
+ null, do not copy; instead, return the length of what the result
+ would have been. */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+{
+ if (*yystr == '"')
+ {
+ YYSIZE_T yyn = 0;
+ char const *yyp = yystr;
+
+ for (;;)
+ switch (*++yyp)
+ {
+ case '\'':
+ case ',':
+ goto do_not_strip_quotes;
+
+ case '\\':
+ if (*++yyp != '\\')
+ goto do_not_strip_quotes;
+ /* Fall through. */
+ default:
+ if (yyres)
+ yyres[yyn] = *yyp;
+ yyn++;
+ break;
+
+ case '"':
+ if (yyres)
+ yyres[yyn] = '\0';
+ return yyn;
+ }
+ do_not_strip_quotes: ;
+ }
+
+ if (! yyres)
+ return yystrlen (yystr);
+
+ return yystpcpy (yyres, yystr) - yyres;
+}
+# endif
+
+/* Copy into YYRESULT an error message about the unexpected token
+ YYCHAR while in state YYSTATE. Return the number of bytes copied,
+ including the terminating null byte. If YYRESULT is null, do not
+ copy anything; just return the number of bytes that would be
+ copied. As a special case, return 0 if an ordinary "syntax error"
+ message will do. Return YYSIZE_MAXIMUM if overflow occurs during
+ size calculation. */
+static YYSIZE_T
+yysyntax_error (char *yyresult, int yystate, int yychar)
+{
+ int yyn = yypact[yystate];
+
+ if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
+ return 0;
+ else
+ {
+ int yytype = YYTRANSLATE (yychar);
+ YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
+ YYSIZE_T yysize = yysize0;
+ YYSIZE_T yysize1;
+ int yysize_overflow = 0;
+ enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+ int yyx;
+
+# if 0
+ /* This is so xgettext sees the translatable formats that are
+ constructed on the fly. */
+ YY_("syntax error, unexpected %s");
+ YY_("syntax error, unexpected %s, expecting %s");
+ YY_("syntax error, unexpected %s, expecting %s or %s");
+ YY_("syntax error, unexpected %s, expecting %s or %s or %s");
+ YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
+# endif
+ char *yyfmt;
+ char const *yyf;
+ static char const yyunexpected[] = "syntax error, unexpected %s";
+ static char const yyexpecting[] = ", expecting %s";
+ static char const yyor[] = " or %s";
+ char yyformat[sizeof yyunexpected
+ + sizeof yyexpecting - 1
+ + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2)
+ * (sizeof yyor - 1))];
+ char const *yyprefix = yyexpecting;
+
+ /* Start YYX at -YYN if negative to avoid negative indexes in
+ YYCHECK. */
+ int yyxbegin = yyn < 0 ? -yyn : 0;
+
+ /* Stay within bounds of both yycheck and yytname. */
+ int yychecklim = YYLAST - yyn + 1;
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+ int yycount = 1;
+
+ yyarg[0] = yytname[yytype];
+ yyfmt = yystpcpy (yyformat, yyunexpected);
+
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
+ {
+ if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+ {
+ yycount = 1;
+ yysize = yysize0;
+ yyformat[sizeof yyunexpected - 1] = '\0';
+ break;
+ }
+ yyarg[yycount++] = yytname[yyx];
+ yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+ yysize_overflow |= (yysize1 < yysize);
+ yysize = yysize1;
+ yyfmt = yystpcpy (yyfmt, yyprefix);
+ yyprefix = yyor;
+ }
+
+ yyf = YY_(yyformat);
+ yysize1 = yysize + yystrlen (yyf);
+ yysize_overflow |= (yysize1 < yysize);
+ yysize = yysize1;
+
+ if (yysize_overflow)
+ return YYSIZE_MAXIMUM;
+
+ if (yyresult)
+ {
+ /* Avoid sprintf, as that infringes on the user's name space.
+ Don't have undefined behavior even if the translation
+ produced a string with the wrong number of "%s"s. */
+ char *yyp = yyresult;
+ int yyi = 0;
+ while ((*yyp = *yyf) != '\0')
+ {
+ if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
+ {
+ yyp += yytnamerr (yyp, yyarg[yyi++]);
+ yyf += 2;
+ }
+ else
+ {
+ yyp++;
+ yyf++;
+ }
+ }
+ }
+ return yysize;
+ }
+}
+#endif /* YYERROR_VERBOSE */
+
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol. |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, void* yyscanner, jsgf_t *jsgf)
+#else
+static void
+yydestruct (yymsg, yytype, yyvaluep, yyscanner, jsgf)
+ const char *yymsg;
+ int yytype;
+ YYSTYPE *yyvaluep;
+ void* yyscanner;
+ jsgf_t *jsgf;
+#endif
+{
+ YYUSE (yyvaluep);
+ YYUSE (yyscanner);
+ YYUSE (jsgf);
+
+ if (!yymsg)
+ yymsg = "Deleting";
+ YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
+ switch (yytype)
+ {
+
+ default:
+ break;
+ }
+}
+
+/* Prevent warnings from -Wmissing-prototypes. */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void* yyscanner, jsgf_t *jsgf);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
+
+
+
+/*-------------------------.
+| yyparse or yypush_parse. |
+`-------------------------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void *YYPARSE_PARAM)
+#else
+int
+yyparse (YYPARSE_PARAM)
+ void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void* yyscanner, jsgf_t *jsgf)
+#else
+int
+yyparse (yyscanner, jsgf)
+ void* yyscanner;
+ jsgf_t *jsgf;
+#endif
+#endif
+{
+/* The lookahead symbol. */
+int yychar;
+
+/* The semantic value of the lookahead symbol. */
+YYSTYPE yylval;
+
+ /* Number of syntax errors so far. */
+ int yynerrs;
+
+ int yystate;
+ /* Number of tokens to shift before error messages enabled. */
+ int yyerrstatus;
+
+ /* The stacks and their tools:
+ `yyss': related to states.
+ `yyvs': related to semantic values.
+
+ Refer to the stacks thru separate pointers, to allow yyoverflow
+ to reallocate them elsewhere. */
+
+ /* The state stack. */
+ yytype_int16 yyssa[YYINITDEPTH];
+ yytype_int16 *yyss;
+ yytype_int16 *yyssp;
+
+ /* The semantic value stack. */
+ YYSTYPE yyvsa[YYINITDEPTH];
+ YYSTYPE *yyvs;
+ YYSTYPE *yyvsp;
+
+ YYSIZE_T yystacksize;
+
+ int yyn;
+ int yyresult;
+ /* Lookahead token as an internal (translated) token number. */
+ int yytoken;
+ /* The variables used to return semantic value and location from the
+ action routines. */
+ YYSTYPE yyval;
+
+#if YYERROR_VERBOSE
+ /* Buffer for error messages, and its allocated size. */
+ char yymsgbuf[128];
+ char *yymsg = yymsgbuf;
+ YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
+
+ /* The number of symbols on the RHS of the reduced rule.
+ Keep to zero when no symbol should be popped. */
+ int yylen = 0;
+
+ yytoken = 0;
+ yyss = yyssa;
+ yyvs = yyvsa;
+ yystacksize = YYINITDEPTH;
+
+ YYDPRINTF ((stderr, "Starting parse\n"));
+
+ yystate = 0;
+ yyerrstatus = 0;
+ yynerrs = 0;
+ yychar = YYEMPTY; /* Cause a token to be read. */
+
+ /* Initialize stack pointers.
+ Waste one element of value and location stack
+ so that they stay on the same level as the state stack.
+ The wasted elements are never initialized. */
+ yyssp = yyss;
+ yyvsp = yyvs;
+
+ goto yysetstate;
+
+/*------------------------------------------------------------.
+| yynewstate -- Push a new state, which is found in yystate. |
+`------------------------------------------------------------*/
+ yynewstate:
+ /* In all cases, when you get here, the value and location stacks
+ have just been pushed. So pushing a state here evens the stacks. */
+ yyssp++;
+
+ yysetstate:
+ *yyssp = yystate;
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ {
+ /* Get the current used size of the three stacks, in elements. */
+ YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+ {
+ /* Give user a chance to reallocate the stack. Use copies of
+ these so that the &'s don't force the real ones into
+ memory. */
+ YYSTYPE *yyvs1 = yyvs;
+ yytype_int16 *yyss1 = yyss;
+
+ /* Each stack pointer address is followed by the size of the
+ data in use in that stack, in bytes. This used to be a
+ conditional around just the two extra args, but that might
+ be undefined if yyoverflow is a macro. */
+ yyoverflow (YY_("memory exhausted"),
+ &yyss1, yysize * sizeof (*yyssp),
+ &yyvs1, yysize * sizeof (*yyvsp),
+ &yystacksize);
+
+ yyss = yyss1;
+ yyvs = yyvs1;
+ }
+#else /* no yyoverflow */
+# ifndef YYSTACK_RELOCATE
+ goto yyexhaustedlab;
+# else
+ /* Extend the stack our own way. */
+ if (YYMAXDEPTH <= yystacksize)
+ goto yyexhaustedlab;
+ yystacksize *= 2;
+ if (YYMAXDEPTH < yystacksize)
+ yystacksize = YYMAXDEPTH;
+
+ {
+ yytype_int16 *yyss1 = yyss;
+ union yyalloc *yyptr =
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+ if (! yyptr)
+ goto yyexhaustedlab;
+ YYSTACK_RELOCATE (yyss_alloc, yyss);
+ YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+# undef YYSTACK_RELOCATE
+ if (yyss1 != yyssa)
+ YYSTACK_FREE (yyss1);
+ }
+# endif
+#endif /* no yyoverflow */
+
+ yyssp = yyss + yysize - 1;
+ yyvsp = yyvs + yysize - 1;
+
+ YYDPRINTF ((stderr, "Stack size increased to %lu\n",
+ (unsigned long int) yystacksize));
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ YYABORT;
+ }
+
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+
+ if (yystate == YYFINAL)
+ YYACCEPT;
+
+ goto yybackup;
+
+/*-----------.
+| yybackup. |
+`-----------*/
+yybackup:
+
+ /* Do appropriate processing given the current state. Read a
+ lookahead token if we need one and don't already have one. */
+
+ /* First try to decide what to do without reference to lookahead token. */
+ yyn = yypact[yystate];
+ if (yyn == YYPACT_NINF)
+ goto yydefault;
+
+ /* Not known => get a lookahead token if don't already have one. */
+
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
+ if (yychar == YYEMPTY)
+ {
+ YYDPRINTF ((stderr, "Reading a token: "));
+ yychar = YYLEX;
+ }
+
+ if (yychar <= YYEOF)
+ {
+ yychar = yytoken = YYEOF;
+ YYDPRINTF ((stderr, "Now at end of input.\n"));
+ }
+ else
+ {
+ yytoken = YYTRANSLATE (yychar);
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+ }
+
+ /* If the proper action on seeing token YYTOKEN is to reduce or to
+ detect an error, take that action. */
+ yyn += yytoken;
+ if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+ goto yydefault;
+ yyn = yytable[yyn];
+ if (yyn <= 0)
+ {
+ if (yyn == 0 || yyn == YYTABLE_NINF)
+ goto yyerrlab;
+ yyn = -yyn;
+ goto yyreduce;
+ }
+
+ /* Count tokens shifted since error; after three, turn off error
+ status. */
+ if (yyerrstatus)
+ yyerrstatus--;
+
+ /* Shift the lookahead token. */
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+
+ /* Discard the shifted token. */
+ yychar = YYEMPTY;
+
+ yystate = yyn;
+ *++yyvsp = yylval;
+
+ goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state. |
+`-----------------------------------------------------------*/
+yydefault:
+ yyn = yydefact[yystate];
+ if (yyn == 0)
+ goto yyerrlab;
+ goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- Do a reduction. |
+`-----------------------------*/
+yyreduce:
+ /* yyn is the number of a rule to reduce with. */
+ yylen = yyr2[yyn];
+
+ /* If YYLEN is nonzero, implement the default value of the action:
+ `$$ = $1'.
+
+ Otherwise, the following line sets YYVAL to garbage.
+ This behavior is undocumented and Bison
+ users should not rely upon it. Assigning to YYVAL
+ unconditionally makes the parser a bit smaller, and it avoids a
+ GCC warning that YYVAL may be used uninitialized. */
+ yyval = yyvsp[1-yylen];
+
+
+ YY_REDUCE_PRINT (yyn);
+ switch (yyn)
+ {
+ case 5:
+
+/* Line 1455 of yacc.c */
+#line 87 "jsgf_parser.y"
+ { jsgf->name = (yyvsp[(2) - (2)].name); }
+ break;
+
+ case 7:
+
+/* Line 1455 of yacc.c */
+#line 91 "jsgf_parser.y"
+ { jsgf->version = (yyvsp[(2) - (3)].name); }
+ break;
+
+ case 8:
+
+/* Line 1455 of yacc.c */
+#line 92 "jsgf_parser.y"
+ { jsgf->version = (yyvsp[(2) - (4)].name); jsgf->charset = (yyvsp[(3) - (4)].name); }
+ break;
+
+ case 9:
+
+/* Line 1455 of yacc.c */
+#line 93 "jsgf_parser.y"
+ { jsgf->version = (yyvsp[(2) - (5)].name); jsgf->charset = (yyvsp[(3) - (5)].name);
+ jsgf->locale = (yyvsp[(4) - (5)].name); }
+ break;
+
+ case 10:
+
+/* Line 1455 of yacc.c */
+#line 97 "jsgf_parser.y"
+ { (yyval.name) = (yyvsp[(2) - (3)].name); }
+ break;
+
+ case 13:
+
+/* Line 1455 of yacc.c */
+#line 104 "jsgf_parser.y"
+ { jsgf_import_rule(jsgf, (yyvsp[(2) - (3)].name)); ckd_free((yyvsp[(2) - (3)].name)); }
+ break;
+
+ case 16:
+
+/* Line 1455 of yacc.c */
+#line 111 "jsgf_parser.y"
+ { jsgf_define_rule(jsgf, (yyvsp[(1) - (4)].name), (yyvsp[(3) - (4)].rhs), 0); ckd_free((yyvsp[(1) - (4)].name)); }
+ break;
+
+ case 17:
+
+/* Line 1455 of yacc.c */
+#line 112 "jsgf_parser.y"
+ { jsgf_define_rule(jsgf, (yyvsp[(2) - (5)].name), (yyvsp[(4) - (5)].rhs), 1); ckd_free((yyvsp[(2) - (5)].name)); }
+ break;
+
+ case 18:
+
+/* Line 1455 of yacc.c */
+#line 115 "jsgf_parser.y"
+ { (yyval.rhs) = (yyvsp[(1) - (1)].rhs); (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); }
+ break;
+
+ case 19:
+
+/* Line 1455 of yacc.c */
+#line 116 "jsgf_parser.y"
+ { (yyval.rhs) = (yyvsp[(3) - (3)].rhs);
+ (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms);
+ (yyval.rhs)->alt = (yyvsp[(1) - (3)].rhs); }
+ break;
+
+ case 20:
+
+/* Line 1455 of yacc.c */
+#line 121 "jsgf_parser.y"
+ { (yyval.rhs) = ckd_calloc(1, sizeof(*(yyval.rhs)));
+ (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(1) - (1)].atom)); }
+ break;
+
+ case 21:
+
+/* Line 1455 of yacc.c */
+#line 123 "jsgf_parser.y"
+ { (yyval.rhs) = (yyvsp[(1) - (2)].rhs);
+ (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(2) - (2)].atom)); }
+ break;
+
+ case 23:
+
+/* Line 1455 of yacc.c */
+#line 128 "jsgf_parser.y"
+ { (yyval.atom) = (yyvsp[(1) - (2)].atom);
+ (yyval.atom)->tags = glist_add_ptr((yyval.atom)->tags, (yyvsp[(2) - (2)].name)); }
+ break;
+
+ case 25:
+
+/* Line 1455 of yacc.c */
+#line 133 "jsgf_parser.y"
+ { (yyval.atom) = (yyvsp[(2) - (2)].atom); (yyval.atom)->weight = (yyvsp[(1) - (2)].weight); }
+ break;
+
+ case 26:
+
+/* Line 1455 of yacc.c */
+#line 136 "jsgf_parser.y"
+ { (yyval.rule) = jsgf_define_rule(jsgf, NULL, (yyvsp[(2) - (3)].rhs), 0); }
+ break;
+
+ case 27:
+
+/* Line 1455 of yacc.c */
+#line 139 "jsgf_parser.y"
+ { (yyval.rule) = jsgf_optional_new(jsgf, (yyvsp[(2) - (3)].rhs)); }
+ break;
+
+ case 28:
+
+/* Line 1455 of yacc.c */
+#line 142 "jsgf_parser.y"
+ { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); }
+ break;
+
+ case 29:
+
+/* Line 1455 of yacc.c */
+#line 143 "jsgf_parser.y"
+ { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); }
+ break;
+
+ case 30:
+
+/* Line 1455 of yacc.c */
+#line 144 "jsgf_parser.y"
+ { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); }
+ break;
+
+ case 31:
+
+/* Line 1455 of yacc.c */
+#line 145 "jsgf_parser.y"
+ { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); }
+ break;
+
+ case 32:
+
+/* Line 1455 of yacc.c */
+#line 146 "jsgf_parser.y"
+ { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 0); }
+ break;
+
+ case 33:
+
+/* Line 1455 of yacc.c */
+#line 147 "jsgf_parser.y"
+ { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 1); }
+ break;
+
+
+
+/* Line 1455 of yacc.c */
+#line 1580 "jsgf_parser.c"
+ default: break;
+ }
+ YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+
+ *++yyvsp = yyval;
+
+ /* Now `shift' the result of the reduction. Determine what state
+ that goes to, based on the state we popped back to and the rule
+ number reduced by. */
+
+ yyn = yyr1[yyn];
+
+ yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+ if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+ yystate = yytable[yystate];
+ else
+ yystate = yydefgoto[yyn - YYNTOKENS];
+
+ goto yynewstate;
+
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+ /* If not already recovering from an error, report this error. */
+ if (!yyerrstatus)
+ {
+ ++yynerrs;
+#if ! YYERROR_VERBOSE
+ yyerror (yyscanner, jsgf, YY_("syntax error"));
+#else
+ {
+ YYSIZE_T yysize = yysyntax_error (0, yystate, yychar);
+ if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
+ {
+ YYSIZE_T yyalloc = 2 * yysize;
+ if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
+ yyalloc = YYSTACK_ALLOC_MAXIMUM;
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+ yymsg = (char *) YYSTACK_ALLOC (yyalloc);
+ if (yymsg)
+ yymsg_alloc = yyalloc;
+ else
+ {
+ yymsg = yymsgbuf;
+ yymsg_alloc = sizeof yymsgbuf;
+ }
+ }
+
+ if (0 < yysize && yysize <= yymsg_alloc)
+ {
+ (void) yysyntax_error (yymsg, yystate, yychar);
+ yyerror (yyscanner, jsgf, yymsg);
+ }
+ else
+ {
+ yyerror (yyscanner, jsgf, YY_("syntax error"));
+ if (yysize != 0)
+ goto yyexhaustedlab;
+ }
+ }
+#endif
+ }
+
+
+
+ if (yyerrstatus == 3)
+ {
+ /* If just tried and failed to reuse lookahead token after an
+ error, discard it. */
+
+ if (yychar <= YYEOF)
+ {
+ /* Return failure if at end of input. */
+ if (yychar == YYEOF)
+ YYABORT;
+ }
+ else
+ {
+ yydestruct ("Error: discarding",
+ yytoken, &yylval, yyscanner, jsgf);
+ yychar = YYEMPTY;
+ }
+ }
+
+ /* Else will try to reuse lookahead token after shifting the error
+ token. */
+ goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR. |
+`---------------------------------------------------*/
+yyerrorlab:
+
+ /* Pacify compilers like GCC when the user code never invokes
+ YYERROR and the label yyerrorlab therefore never appears in user
+ code. */
+ if (/*CONSTCOND*/ 0)
+ goto yyerrorlab;
+
+ /* Do not reclaim the symbols of the rule which action triggered
+ this YYERROR. */
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+ yystate = *yyssp;
+ goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR. |
+`-------------------------------------------------------------*/
+yyerrlab1:
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
+
+ for (;;)
+ {
+ yyn = yypact[yystate];
+ if (yyn != YYPACT_NINF)
+ {
+ yyn += YYTERROR;
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+ {
+ yyn = yytable[yyn];
+ if (0 < yyn)
+ break;
+ }
+ }
+
+ /* Pop the current state because it cannot handle the error token. */
+ if (yyssp == yyss)
+ YYABORT;
+
+
+ yydestruct ("Error: popping",
+ yystos[yystate], yyvsp, yyscanner, jsgf);
+ YYPOPSTACK (1);
+ yystate = *yyssp;
+ YY_STACK_PRINT (yyss, yyssp);
+ }
+
+ *++yyvsp = yylval;
+
+
+ /* Shift the error token. */
+ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
+ yystate = yyn;
+ goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here. |
+`-------------------------------------*/
+yyacceptlab:
+ yyresult = 0;
+ goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here. |
+`-----------------------------------*/
+yyabortlab:
+ yyresult = 1;
+ goto yyreturn;
+
+#if !defined(yyoverflow) || YYERROR_VERBOSE
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here. |
+`-------------------------------------------------*/
+yyexhaustedlab:
+ yyerror (yyscanner, jsgf, YY_("memory exhausted"));
+ yyresult = 2;
+ /* Fall through. */
+#endif
+
+yyreturn:
+ if (yychar != YYEMPTY)
+ yydestruct ("Cleanup: discarding lookahead",
+ yytoken, &yylval, yyscanner, jsgf);
+ /* Do not reclaim the symbols of the rule which action triggered
+ this YYABORT or YYACCEPT. */
+ YYPOPSTACK (yylen);
+ YY_STACK_PRINT (yyss, yyssp);
+ while (yyssp != yyss)
+ {
+ yydestruct ("Cleanup: popping",
+ yystos[*yyssp], yyvsp, yyscanner, jsgf);
+ YYPOPSTACK (1);
+ }
+#ifndef yyoverflow
+ if (yyss != yyssa)
+ YYSTACK_FREE (yyss);
+#endif
+#if YYERROR_VERBOSE
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+#endif
+ /* Make sure YYID is used. */
+ return YYID (yyresult);
+}
+
+
+
+/* Line 1675 of yacc.c */
+#line 150 "jsgf_parser.y"
+
+
+void
+yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s)
+{
+ E_ERROR("%s at line %d current token '%s'\n", s, yyget_lineno(lex), yyget_text(lex));
+}
+
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h
new file mode 100644
index 000000000..95f68e329
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h
@@ -0,0 +1,90 @@
+
+/* A Bison parser, made by GNU Bison 2.4.1. */
+
+/* Skeleton interface for Bison's Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ HEADER = 258,
+ GRAMMAR = 259,
+ IMPORT = 260,
+ PUBLIC = 261,
+ TOKEN = 262,
+ RULENAME = 263,
+ TAG = 264,
+ WEIGHT = 265
+ };
+#endif
+/* Tokens. */
+#define HEADER 258
+#define GRAMMAR 259
+#define IMPORT 260
+#define PUBLIC 261
+#define TOKEN 262
+#define RULENAME 263
+#define TAG 264
+#define WEIGHT 265
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 1676 of yacc.c */
+#line 65 "jsgf_parser.y"
+
+ char *name;
+ float weight;
+ jsgf_rule_t *rule;
+ jsgf_rhs_t *rhs;
+ jsgf_atom_t *atom;
+
+
+
+/* Line 1676 of yacc.c */
+#line 82 "jsgf_parser.h"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+
+
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c
new file mode 100644
index 000000000..5d41d2a6b
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c
@@ -0,0 +1,2199 @@
+#line 2 "jsgf_scanner.c"
+
+#line 4 "jsgf_scanner.c"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 37
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif /* defined (__STDC__) */
+#endif /* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index. If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+ are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yyg->yy_start = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yyg->yy_start - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart(yyin ,yyscanner )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#define YY_BUF_SIZE 16384
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+ /* Note: We specifically omit the test for yy_rule_can_match_eol because it requires
+ * access to the local variable yy_act. Since yyless() is a macro, it would break
+ * existing scanners that call yyless() from OUTSIDE yylex.
+ * One obvious solution it to make yy_act a global. I tried that, and saw
+ * a 5% performance hit in a non-yylineno scanner, because yy_act is
+ * normally declared as a register variable-- so it is not worth it.
+ */
+ #define YY_LESS_LINENO(n) \
+ do { \
+ int yyl;\
+ for ( yyl = n; yyl < yyleng; ++yyl )\
+ if ( yytext[yyl] == '\n' )\
+ --yylineno;\
+ }while(0)
+
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ *yy_cp = yyg->yy_hold_char; \
+ YY_RESTORE_YY_MORE_OFFSET \
+ yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+
+#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner )
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ yy_size_t yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \
+ ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \
+ : NULL)
+
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top]
+
+void yyrestart (FILE *input_file ,yyscan_t yyscanner );
+void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void yypop_buffer_state (yyscan_t yyscanner );
+
+static void yyensure_buffer_stack (yyscan_t yyscanner );
+static void yy_load_buffer_state (yyscan_t yyscanner );
+static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner );
+
+#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ,yyscanner)
+
+YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
+
+void *yyalloc (yy_size_t ,yyscan_t yyscanner );
+void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void yyfree (void * ,yyscan_t yyscanner );
+
+#define yy_new_buffer yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){ \
+ yyensure_buffer_stack (yyscanner); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+ }
+
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){\
+ yyensure_buffer_stack (yyscanner); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+ }
+
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define yywrap(yyscanner) 1
+#define YY_SKIP_YYWRAP
+
+typedef unsigned char YY_CHAR;
+
+typedef int yy_state_type;
+
+#define yytext_ptr yytext_r
+
+static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
+static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner);
+static int yy_get_next_buffer (yyscan_t yyscanner );
+static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ yyg->yytext_ptr = yy_bp; \
+ yyleng = (size_t) (yy_cp - yy_bp); \
+ yyg->yy_hold_char = *yy_cp; \
+ *yy_cp = '\0'; \
+ yyg->yy_c_buf_p = yy_cp;
+
+#define YY_NUM_RULES 22
+#define YY_END_OF_BUFFER 23
+/* This struct is not used in this scanner,
+ but its presence is necessary. */
+struct yy_trans_info
+ {
+ flex_int32_t yy_verify;
+ flex_int32_t yy_nxt;
+ };
+static yyconst flex_int16_t yy_accept[98] =
+ { 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 23, 22,
+ 1, 22, 22, 22, 22, 22, 22, 22, 5, 1,
+ 5, 17, 1, 17, 21, 21, 18, 21, 21, 9,
+ 1, 9, 0, 3, 0, 0, 0, 0, 0, 0,
+ 4, 17, 17, 0, 17, 17, 7, 0, 20, 0,
+ 0, 0, 0, 0, 16, 8, 0, 0, 2, 14,
+ 0, 0, 0, 0, 19, 0, 17, 0, 17, 17,
+ 0, 0, 6, 20, 0, 15, 0, 0, 16, 0,
+ 0, 0, 0, 0, 19, 0, 0, 0, 10, 0,
+ 0, 0, 0, 12, 13, 11, 0
+
+ } ;
+
+static yyconst flex_int32_t yy_ec[256] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 1, 4, 5, 1, 1, 1, 1, 6,
+ 6, 7, 6, 1, 8, 9, 10, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 1, 12, 13,
+ 6, 14, 1, 1, 1, 1, 1, 1, 1, 15,
+ 16, 1, 1, 17, 1, 1, 1, 1, 1, 1,
+ 1, 1, 18, 1, 1, 1, 1, 1, 1, 1,
+ 6, 19, 6, 1, 1, 1, 20, 21, 22, 1,
+
+ 23, 1, 24, 1, 25, 1, 1, 26, 27, 1,
+ 28, 29, 1, 30, 1, 31, 32, 1, 1, 1,
+ 1, 1, 33, 6, 34, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 35, 1, 1, 1,
+ 36, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 37, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+ } ;
+
+static yyconst flex_int32_t yy_meta[38] =
+ { 0,
+ 1, 2, 2, 1, 1, 2, 2, 1, 1, 2,
+ 1, 2, 3, 3, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 2, 2, 1, 1, 1
+ } ;
+
+static yyconst flex_int16_t yy_base[113] =
+ { 0,
+ 0, 36, 4, 12, 72, 105, 14, 20, 135, 312,
+ 312, 117, 2, 0, 103, 105, 99, 95, 312, 312,
+ 119, 0, 312, 138, 312, 21, 312, 0, 1, 312,
+ 312, 118, 109, 312, 123, 111, 104, 94, 101, 85,
+ 312, 0, 171, 14, 0, 204, 312, 109, 113, 41,
+ 106, 96, 21, 23, 312, 312, 88, 98, 312, 312,
+ 73, 71, 70, 89, 312, 44, 0, 39, 0, 237,
+ 43, 90, 312, 312, 57, 312, 37, 69, 43, 77,
+ 64, 57, 58, 64, 76, 94, 79, 59, 312, 39,
+ 14, 14, 4, 312, 312, 312, 312, 271, 274, 277,
+
+ 280, 283, 0, 285, 288, 290, 293, 296, 299, 302,
+ 305, 308
+ } ;
+
+static yyconst flex_int16_t yy_def[113] =
+ { 0,
+ 98, 98, 99, 99, 100, 100, 101, 101, 97, 97,
+ 97, 97, 97, 102, 97, 97, 97, 97, 97, 97,
+ 97, 103, 97, 104, 97, 97, 97, 105, 106, 97,
+ 97, 97, 97, 97, 107, 102, 97, 97, 97, 97,
+ 97, 103, 104, 108, 103, 109, 97, 97, 110, 97,
+ 97, 105, 106, 111, 97, 97, 97, 107, 97, 97,
+ 97, 97, 97, 97, 97, 112, 43, 108, 43, 109,
+ 97, 110, 97, 97, 97, 97, 106, 111, 106, 97,
+ 97, 97, 97, 97, 108, 112, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 0, 97, 97, 97,
+
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97
+ } ;
+
+static yyconst flex_int16_t yy_nxt[350] =
+ { 0,
+ 42, 11, 11, 97, 12, 20, 11, 97, 34, 13,
+ 21, 35, 14, 20, 11, 31, 11, 65, 21, 54,
+ 32, 31, 11, 15, 16, 53, 32, 47, 17, 48,
+ 49, 50, 66, 96, 55, 95, 18, 11, 11, 54,
+ 12, 78, 65, 51, 94, 13, 44, 85, 14, 48,
+ 74, 50, 74, 87, 55, 54, 79, 66, 93, 15,
+ 16, 54, 86, 51, 17, 51, 74, 88, 74, 88,
+ 55, 53, 18, 23, 11, 24, 55, 25, 25, 65,
+ 33, 26, 92, 27, 28, 25, 91, 78, 74, 87,
+ 90, 89, 73, 84, 66, 83, 44, 85, 82, 81,
+
+ 59, 51, 79, 80, 29, 25, 23, 11, 24, 76,
+ 25, 25, 86, 75, 26, 73, 27, 28, 25, 71,
+ 64, 63, 62, 61, 60, 59, 57, 56, 41, 40,
+ 39, 38, 37, 33, 97, 97, 97, 29, 25, 44,
+ 44, 45, 97, 44, 44, 97, 97, 44, 97, 44,
+ 44, 44, 97, 97, 97, 97, 46, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 44, 44, 44, 44, 45, 97, 44, 44, 97, 97,
+ 44, 97, 44, 44, 44, 97, 97, 97, 97, 46,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+
+ 97, 97, 97, 44, 44, 68, 44, 69, 97, 68,
+ 68, 97, 97, 68, 97, 68, 68, 68, 97, 97,
+ 97, 97, 70, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 68, 68, 68, 44,
+ 69, 97, 68, 68, 97, 97, 68, 97, 68, 68,
+ 68, 97, 97, 97, 97, 70, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 68,
+ 68, 10, 10, 10, 19, 19, 19, 22, 22, 22,
+ 30, 30, 30, 36, 36, 43, 43, 43, 52, 52,
+ 53, 53, 53, 58, 58, 58, 44, 44, 44, 67,
+
+ 67, 67, 72, 72, 72, 77, 77, 77, 68, 68,
+ 68, 9, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97
+ } ;
+
+static yyconst flex_int16_t yy_chk[350] =
+ { 0,
+ 103, 1, 1, 0, 1, 3, 3, 0, 13, 1,
+ 3, 13, 1, 4, 4, 7, 7, 44, 4, 29,
+ 7, 8, 8, 1, 1, 54, 8, 26, 1, 26,
+ 26, 26, 44, 93, 29, 92, 1, 2, 2, 53,
+ 2, 54, 68, 26, 91, 2, 66, 66, 2, 50,
+ 50, 50, 71, 71, 53, 77, 54, 68, 90, 2,
+ 2, 79, 66, 50, 2, 71, 75, 75, 88, 88,
+ 77, 78, 2, 5, 5, 5, 79, 5, 5, 85,
+ 84, 5, 83, 5, 5, 5, 82, 78, 87, 87,
+ 81, 80, 72, 64, 85, 63, 86, 86, 62, 61,
+
+ 58, 87, 78, 57, 5, 5, 6, 6, 6, 52,
+ 6, 6, 86, 51, 6, 49, 6, 6, 6, 48,
+ 40, 39, 38, 37, 36, 35, 33, 32, 21, 18,
+ 17, 16, 15, 12, 9, 0, 0, 6, 6, 24,
+ 24, 24, 0, 24, 24, 0, 0, 24, 0, 24,
+ 24, 24, 0, 0, 0, 0, 24, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 24, 24, 43, 43, 43, 0, 43, 43, 0, 0,
+ 43, 0, 43, 43, 43, 0, 0, 0, 0, 43,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 43, 43, 46, 46, 46, 0, 46,
+ 46, 0, 0, 46, 0, 46, 46, 46, 0, 0,
+ 0, 0, 46, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 46, 46, 70, 70,
+ 70, 0, 70, 70, 0, 0, 70, 0, 70, 70,
+ 70, 0, 0, 0, 0, 70, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 70,
+ 70, 98, 98, 98, 99, 99, 99, 100, 100, 100,
+ 101, 101, 101, 102, 102, 104, 104, 104, 105, 105,
+ 106, 106, 106, 107, 107, 107, 108, 108, 108, 109,
+
+ 109, 109, 110, 110, 110, 111, 111, 111, 112, 112,
+ 112, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97
+ } ;
+
+/* Table of booleans, true if rule could match eol. */
+static yyconst flex_int32_t yy_rule_can_match_eol[23] =
+ { 0,
+1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,
+ 0, 0, 0, };
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+#line 1 "_jsgf_scanner.l"
+/* -*- mode: text -*- */
+/* ====================================================================
+ * Copyright (c) 2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/* YOU MUST USE FLEX 2.5.37 OR NEWER TO PROCESS THIS FILE!!! */
+#line 39 "_jsgf_scanner.l"
+
+#include "jsgf_internal.h"
+#include "jsgf_parser.h"
+
+#define YY_NO_UNISTD_H 1
+
+
+
+#line 609 "jsgf_scanner.c"
+
+#define INITIAL 0
+#define COMMENT 1
+#define DECL 2
+#define DECLCOMMENT 3
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+/* Holds the entire state of the reentrant scanner. */
+struct yyguts_t
+ {
+
+ /* User-defined. Not touched by flex. */
+ YY_EXTRA_TYPE yyextra_r;
+
+ /* The rest are the same as the globals declared in the non-reentrant scanner. */
+ FILE *yyin_r, *yyout_r;
+ size_t yy_buffer_stack_top; /**< index of top of stack. */
+ size_t yy_buffer_stack_max; /**< capacity of stack. */
+ YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
+ char yy_hold_char;
+ yy_size_t yy_n_chars;
+ yy_size_t yyleng_r;
+ char *yy_c_buf_p;
+ int yy_init;
+ int yy_start;
+ int yy_did_buffer_switch_on_eof;
+ int yy_start_stack_ptr;
+ int yy_start_stack_depth;
+ int *yy_start_stack;
+ yy_state_type yy_last_accepting_state;
+ char* yy_last_accepting_cpos;
+
+ int yylineno_r;
+ int yy_flex_debug_r;
+
+ char *yytext_r;
+ int yy_more_flag;
+ int yy_more_len;
+
+ YYSTYPE * yylval_r;
+
+ }; /* end struct yyguts_t */
+
+static int yy_init_globals (yyscan_t yyscanner );
+
+ /* This must go here because YYSTYPE and YYLTYPE are included
+ * from bison output in section 1.*/
+ # define yylval yyg->yylval_r
+
+int yylex_init (yyscan_t* scanner);
+
+int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy (yyscan_t yyscanner );
+
+int yyget_debug (yyscan_t yyscanner );
+
+void yyset_debug (int debug_flag ,yyscan_t yyscanner );
+
+YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner );
+
+void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+
+FILE *yyget_in (yyscan_t yyscanner );
+
+void yyset_in (FILE * in_str ,yyscan_t yyscanner );
+
+FILE *yyget_out (yyscan_t yyscanner );
+
+void yyset_out (FILE * out_str ,yyscan_t yyscanner );
+
+yy_size_t yyget_leng (yyscan_t yyscanner );
+
+char *yyget_text (yyscan_t yyscanner );
+
+int yyget_lineno (yyscan_t yyscanner );
+
+void yyset_lineno (int line_number ,yyscan_t yyscanner );
+
+int yyget_column (yyscan_t yyscanner );
+
+void yyset_column (int column_no ,yyscan_t yyscanner );
+
+YYSTYPE * yyget_lval (yyscan_t yyscanner );
+
+void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap (yyscan_t yyscanner );
+#else
+extern int yywrap (yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#ifdef __cplusplus
+static int yyinput (yyscan_t yyscanner );
+#else
+static int input (yyscan_t yyscanner );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+ { \
+ int c = '*'; \
+ size_t n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else \
+ { \
+ errno=0; \
+ while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
+ { \
+ if( errno != EINTR) \
+ { \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ break; \
+ } \
+ errno=0; \
+ clearerr(yyin); \
+ } \
+ }\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner)
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex \
+ (YYSTYPE * yylval_param ,yyscan_t yyscanner);
+
+#define YY_DECL int yylex \
+ (YYSTYPE * yylval_param , yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+ register yy_state_type yy_current_state;
+ register char *yy_cp, *yy_bp;
+ register int yy_act;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+#line 59 "_jsgf_scanner.l"
+
+
+#line 850 "jsgf_scanner.c"
+
+ yylval = yylval_param;
+
+ if ( !yyg->yy_init )
+ {
+ yyg->yy_init = 1;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! yyg->yy_start )
+ yyg->yy_start = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( ! YY_CURRENT_BUFFER ) {
+ yyensure_buffer_stack (yyscanner);
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
+ }
+
+ yy_load_buffer_state(yyscanner );
+ }
+
+ while ( 1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = yyg->yy_c_buf_p;
+
+ /* Support of yytext. */
+ *yy_cp = yyg->yy_hold_char;
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yyg->yy_start;
+yy_match:
+ do
+ {
+ register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 98 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ ++yy_cp;
+ }
+ while ( yy_current_state != 97 );
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+
+ YY_DO_BEFORE_ACTION;
+
+ if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] )
+ {
+ int yyl;
+ for ( yyl = 0; yyl < yyleng; ++yyl )
+ if ( yytext[yyl] == '\n' )
+
+ do{ yylineno++;
+ yycolumn=0;
+ }while(0)
+;
+ }
+
+do_action: /* This label is used only to access EOF actions. */
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = yyg->yy_hold_char;
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+ goto yy_find_action;
+
+case 1:
+/* rule 1 can match eol */
+YY_RULE_SETUP
+#line 61 "_jsgf_scanner.l"
+; /* ignore whitespace */
+ YY_BREAK
+case 2:
+/* rule 2 can match eol */
+YY_RULE_SETUP
+#line 62 "_jsgf_scanner.l"
+; /* single-line comments */
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 63 "_jsgf_scanner.l"
+{ BEGIN(COMMENT); } /* C-style comments */
+ YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 64 "_jsgf_scanner.l"
+{ BEGIN(INITIAL); }
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 65 "_jsgf_scanner.l"
+; /* Ignore stuff in comment mode */
+ YY_BREAK
+case 6:
+/* rule 6 can match eol */
+YY_RULE_SETUP
+#line 67 "_jsgf_scanner.l"
+; /* single-line comments inside decl */
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 68 "_jsgf_scanner.l"
+{ BEGIN(DECLCOMMENT); } /* C-style comments inside decl */
+ YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 69 "_jsgf_scanner.l"
+{ BEGIN(DECL); }
+ YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 70 "_jsgf_scanner.l"
+; /* Ignore stuff in comment mode */
+ YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 72 "_jsgf_scanner.l"
+{BEGIN(DECL); return HEADER;}
+ YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 73 "_jsgf_scanner.l"
+{BEGIN(DECL); return GRAMMAR;}
+ YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 74 "_jsgf_scanner.l"
+{BEGIN(DECL); return IMPORT;}
+ YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 75 "_jsgf_scanner.l"
+{BEGIN(DECL); return PUBLIC;}
+ YY_BREAK
+case 14:
+/* rule 14 can match eol */
+YY_RULE_SETUP
+#line 77 "_jsgf_scanner.l"
+{ BEGIN(DECL); yylval->name = strdup(yytext); return RULENAME; }
+ YY_BREAK
+case 15:
+/* rule 15 can match eol */
+YY_RULE_SETUP
+#line 78 "_jsgf_scanner.l"
+{ yylval->name = strdup(yytext); return RULENAME; }
+ YY_BREAK
+case 16:
+/* rule 16 can match eol */
+YY_RULE_SETUP
+#line 80 "_jsgf_scanner.l"
+{ yylval->name = strdup(yytext); return TAG; }
+ YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 81 "_jsgf_scanner.l"
+{ yylval->name = strdup(yytext); return TOKEN; }
+ YY_BREAK
+case 18:
+YY_RULE_SETUP
+#line 82 "_jsgf_scanner.l"
+{ BEGIN(INITIAL); return yytext[0]; }
+ YY_BREAK
+case 19:
+/* rule 19 can match eol */
+YY_RULE_SETUP
+#line 83 "_jsgf_scanner.l"
+{ yylval->name = strdup(yytext); return TOKEN; }
+ YY_BREAK
+case 20:
+YY_RULE_SETUP
+#line 84 "_jsgf_scanner.l"
+{ yylval->weight = atof_c(yytext+1); return WEIGHT; }
+ YY_BREAK
+case 21:
+YY_RULE_SETUP
+#line 85 "_jsgf_scanner.l"
+return yytext[0]; /* Single-character tokens */
+ YY_BREAK
+case 22:
+YY_RULE_SETUP
+#line 87 "_jsgf_scanner.l"
+ECHO;
+ YY_BREAK
+#line 1060 "jsgf_scanner.c"
+case YY_STATE_EOF(INITIAL):
+case YY_STATE_EOF(COMMENT):
+case YY_STATE_EOF(DECL):
+case YY_STATE_EOF(DECLCOMMENT):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = yyg->yy_hold_char;
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between YY_CURRENT_BUFFER and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner);
+
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++yyg->yy_c_buf_p;
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer( yyscanner ) )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ yyg->yy_did_buffer_switch_on_eof = 0;
+
+ if ( yywrap(yyscanner ) )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! yyg->yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yyg->yy_c_buf_p =
+ yyg->yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ yy_cp = yyg->yy_c_buf_p;
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ yyg->yy_c_buf_p =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars];
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ yy_cp = yyg->yy_c_buf_p;
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+} /* end of yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+ register char *source = yyg->yytext_ptr;
+ register int number_to_move, i;
+ int ret_val;
+
+ if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1;
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0;
+
+ else
+ {
+ int num_to_read =
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
+
+ int yy_c_buf_p_offset =
+ (int) (yyg->yy_c_buf_p - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ yy_size_t new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = 0;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+ number_to_move - 1;
+
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+ yyg->yy_n_chars, num_to_read );
+
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ if ( yyg->yy_n_chars == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart(yyin ,yyscanner);
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+ /* Extend the array by 50%, plus the number we really need. */
+ yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
+ if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+ }
+
+ yyg->yy_n_chars += number_to_move;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+ yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+ return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+ static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
+{
+ register yy_state_type yy_current_state;
+ register char *yy_cp;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ yy_current_state = yyg->yy_start;
+
+ for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp )
+ {
+ register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 98 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ }
+
+ return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
+{
+ register int yy_is_jam;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
+ register char *yy_cp = yyg->yy_c_buf_p;
+
+ register YY_CHAR yy_c = 1;
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 98 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ yy_is_jam = (yy_current_state == 97);
+
+ (void)yyg;
+ return yy_is_jam ? 0 : yy_current_state;
+}
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+ static int yyinput (yyscan_t yyscanner)
+#else
+ static int input (yyscan_t yyscanner)
+#endif
+
+{
+ int c;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+
+ if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
+ /* This was really a NUL. */
+ *yyg->yy_c_buf_p = '\0';
+
+ else
+ { /* need more input */
+ yy_size_t offset = yyg->yy_c_buf_p - yyg->yytext_ptr;
+ ++yyg->yy_c_buf_p;
+
+ switch ( yy_get_next_buffer( yyscanner ) )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ yyrestart(yyin ,yyscanner);
+
+ /*FALLTHROUGH*/
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap(yyscanner ) )
+ return EOF;
+
+ if ( ! yyg->yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput(yyscanner);
+#else
+ return input(yyscanner);
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yyg->yy_c_buf_p = yyg->yytext_ptr + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
+ *yyg->yy_c_buf_p = '\0'; /* preserve yytext */
+ yyg->yy_hold_char = *++yyg->yy_c_buf_p;
+
+ if ( c == '\n' )
+
+ do{ yylineno++;
+ yycolumn=0;
+ }while(0)
+;
+
+ return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ * @param yyscanner The scanner object.
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+ void yyrestart (FILE * input_file , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( ! YY_CURRENT_BUFFER ){
+ yyensure_buffer_stack (yyscanner);
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
+ }
+
+ yy_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner);
+ yy_load_buffer_state(yyscanner );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ * @param yyscanner The scanner object.
+ */
+ void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* TODO. We should be able to replace this entire function body
+ * with
+ * yypop_buffer_state();
+ * yypush_buffer_state(new_buffer);
+ */
+ yyensure_buffer_stack (yyscanner);
+ if ( YY_CURRENT_BUFFER == new_buffer )
+ return;
+
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+ yy_load_buffer_state(yyscanner );
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ yyg->yy_did_buffer_switch_on_eof = 1;
+}
+
+static void yy_load_buffer_state (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+ yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+ yyg->yy_hold_char = *yyg->yy_c_buf_p;
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ * @param yyscanner The scanner object.
+ * @return the allocated buffer state.
+ */
+ YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ,yyscanner );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ yy_init_buffer(b,file ,yyscanner);
+
+ return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with yy_create_buffer()
+ * @param yyscanner The scanner object.
+ */
+ void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( ! b )
+ return;
+
+ if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ yyfree((void *) b->yy_ch_buf ,yyscanner );
+
+ yyfree((void *) b ,yyscanner );
+}
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a yyrestart() or at EOF.
+ */
+ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner)
+
+{
+ int oerrno = errno;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ yy_flush_buffer(b ,yyscanner);
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+ /* If b is the current buffer, then yy_init_buffer was _probably_
+ * called from yyrestart() or through yy_get_next_buffer.
+ * In that case, we don't want to reset the lineno or column.
+ */
+ if (b != YY_CURRENT_BUFFER){
+ b->yy_bs_lineno = 1;
+ b->yy_bs_column = 0;
+ }
+
+ b->yy_is_interactive = 0;
+
+ errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ * @param yyscanner The scanner object.
+ */
+ void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == YY_CURRENT_BUFFER )
+ yy_load_buffer_state(yyscanner );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ * the current state. This function will allocate the stack
+ * if necessary.
+ * @param new_buffer The new state.
+ * @param yyscanner The scanner object.
+ */
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if (new_buffer == NULL)
+ return;
+
+ yyensure_buffer_stack(yyscanner);
+
+ /* This block is copied from yy_switch_to_buffer. */
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ /* Only push if top exists. Otherwise, replace top. */
+ if (YY_CURRENT_BUFFER)
+ yyg->yy_buffer_stack_top++;
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+ /* copied from yy_switch_to_buffer. */
+ yy_load_buffer_state(yyscanner );
+ yyg->yy_did_buffer_switch_on_eof = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ * The next element becomes the new top.
+ * @param yyscanner The scanner object.
+ */
+void yypop_buffer_state (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if (!YY_CURRENT_BUFFER)
+ return;
+
+ yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner);
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ if (yyg->yy_buffer_stack_top > 0)
+ --yyg->yy_buffer_stack_top;
+
+ if (YY_CURRENT_BUFFER) {
+ yy_load_buffer_state(yyscanner );
+ yyg->yy_did_buffer_switch_on_eof = 1;
+ }
+}
+
+/* Allocates the stack if it does not exist.
+ * Guarantees space for at least one push.
+ */
+static void yyensure_buffer_stack (yyscan_t yyscanner)
+{
+ yy_size_t num_to_alloc;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (!yyg->yy_buffer_stack) {
+
+ /* First allocation is just for 2 elements, since we don't know if this
+ * scanner will even need a stack. We use 2 instead of 1 to avoid an
+ * immediate realloc on the next call.
+ */
+ num_to_alloc = 1;
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc
+ (num_to_alloc * sizeof(struct yy_buffer_state*)
+ , yyscanner);
+ if ( ! yyg->yy_buffer_stack )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+
+ yyg->yy_buffer_stack_max = num_to_alloc;
+ yyg->yy_buffer_stack_top = 0;
+ return;
+ }
+
+ if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){
+
+ /* Increase the buffer to prepare for a possible push. */
+ int grow_size = 8 /* arbitrary grow size */;
+
+ num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc
+ (yyg->yy_buffer_stack,
+ num_to_alloc * sizeof(struct yy_buffer_state*)
+ , yyscanner);
+ if ( ! yyg->yy_buffer_stack )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ /* zero only the new slots.*/
+ memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*));
+ yyg->yy_buffer_stack_max = num_to_alloc;
+ }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return 0;
+
+ b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+ b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = 0;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ yy_switch_to_buffer(b ,yyscanner );
+
+ return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ * yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner)
+{
+
+ return yy_scan_bytes(yystr,strlen(yystr) ,yyscanner);
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ int i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = _yybytes_len + 2;
+ buf = (char *) yyalloc(n ,yyscanner );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+ for ( i = 0; i < _yybytes_len; ++i )
+ buf[i] = yybytes[i];
+
+ buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = yy_scan_buffer(buf,n ,yyscanner);
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner)
+{
+ (void) fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ yytext[yyleng] = yyg->yy_hold_char; \
+ yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
+ yyg->yy_hold_char = *yyg->yy_c_buf_p; \
+ *yyg->yy_c_buf_p = '\0'; \
+ yyleng = yyless_macro_arg; \
+ } \
+ while ( 0 )
+
+/* Accessor methods (get/set functions) to struct members. */
+
+/** Get the user-defined data for this scanner.
+ * @param yyscanner The scanner object.
+ */
+YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyextra;
+}
+
+/** Get the current line number.
+ * @param yyscanner The scanner object.
+ */
+int yyget_lineno (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (! YY_CURRENT_BUFFER)
+ return 0;
+
+ return yylineno;
+}
+
+/** Get the current column number.
+ * @param yyscanner The scanner object.
+ */
+int yyget_column (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (! YY_CURRENT_BUFFER)
+ return 0;
+
+ return yycolumn;
+}
+
+/** Get the input stream.
+ * @param yyscanner The scanner object.
+ */
+FILE *yyget_in (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyin;
+}
+
+/** Get the output stream.
+ * @param yyscanner The scanner object.
+ */
+FILE *yyget_out (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyout;
+}
+
+/** Get the length of the current token.
+ * @param yyscanner The scanner object.
+ */
+yy_size_t yyget_leng (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyleng;
+}
+
+/** Get the current token.
+ * @param yyscanner The scanner object.
+ */
+
+char *yyget_text (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yytext;
+}
+
+/** Set the user-defined data. This data is never touched by the scanner.
+ * @param user_defined The data to be associated with this scanner.
+ * @param yyscanner The scanner object.
+ */
+void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyextra = user_defined ;
+}
+
+/** Set the current line number.
+ * @param line_number
+ * @param yyscanner The scanner object.
+ */
+void yyset_lineno (int line_number , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* lineno is only valid if an input buffer exists. */
+ if (! YY_CURRENT_BUFFER )
+ YY_FATAL_ERROR( "yyset_lineno called with no buffer" );
+
+ yylineno = line_number;
+}
+
+/** Set the current column.
+ * @param line_number
+ * @param yyscanner The scanner object.
+ */
+void yyset_column (int column_no , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* column is only valid if an input buffer exists. */
+ if (! YY_CURRENT_BUFFER )
+ YY_FATAL_ERROR( "yyset_column called with no buffer" );
+
+ yycolumn = column_no;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param in_str A readable stream.
+ * @param yyscanner The scanner object.
+ * @see yy_switch_to_buffer
+ */
+void yyset_in (FILE * in_str , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyin = in_str ;
+}
+
+void yyset_out (FILE * out_str , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyout = out_str ;
+}
+
+int yyget_debug (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yy_flex_debug;
+}
+
+void yyset_debug (int bdebug , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yy_flex_debug = bdebug ;
+}
+
+/* Accessor methods for yylval and yylloc */
+
+YYSTYPE * yyget_lval (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yylval;
+}
+
+void yyset_lval (YYSTYPE * yylval_param , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yylval = yylval_param;
+}
+
+/* User-visible API */
+
+/* yylex_init is special because it creates the scanner itself, so it is
+ * the ONLY reentrant function that doesn't take the scanner as the last argument.
+ * That's why we explicitly handle the declaration, instead of using our macros.
+ */
+
+int yylex_init(yyscan_t* ptr_yy_globals)
+
+{
+ if (ptr_yy_globals == NULL){
+ errno = EINVAL;
+ return 1;
+ }
+
+ *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL );
+
+ if (*ptr_yy_globals == NULL){
+ errno = ENOMEM;
+ return 1;
+ }
+
+ /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
+ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+
+ return yy_init_globals ( *ptr_yy_globals );
+}
+
+/* yylex_init_extra has the same functionality as yylex_init, but follows the
+ * convention of taking the scanner as the last argument. Note however, that
+ * this is a *pointer* to a scanner, as it will be allocated by this call (and
+ * is the reason, too, why this function also must handle its own declaration).
+ * The user defined value in the first argument will be available to yyalloc in
+ * the yyextra field.
+ */
+
+int yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
+
+{
+ struct yyguts_t dummy_yyguts;
+
+ yyset_extra (yy_user_defined, &dummy_yyguts);
+
+ if (ptr_yy_globals == NULL){
+ errno = EINVAL;
+ return 1;
+ }
+
+ *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
+
+ if (*ptr_yy_globals == NULL){
+ errno = ENOMEM;
+ return 1;
+ }
+
+ /* By setting to 0xAA, we expose bugs in
+ yy_init_globals. Leave at 0x00 for releases. */
+ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+
+ yyset_extra (yy_user_defined, *ptr_yy_globals);
+
+ return yy_init_globals ( *ptr_yy_globals );
+}
+
+static int yy_init_globals (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ /* Initialization is the same as for the non-reentrant scanner.
+ * This function is called from yylex_destroy(), so don't allocate here.
+ */
+
+ yyg->yy_buffer_stack = 0;
+ yyg->yy_buffer_stack_top = 0;
+ yyg->yy_buffer_stack_max = 0;
+ yyg->yy_c_buf_p = (char *) 0;
+ yyg->yy_init = 0;
+ yyg->yy_start = 0;
+
+ yyg->yy_start_stack_ptr = 0;
+ yyg->yy_start_stack_depth = 0;
+ yyg->yy_start_stack = NULL;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+ yyin = stdin;
+ yyout = stdout;
+#else
+ yyin = (FILE *) 0;
+ yyout = (FILE *) 0;
+#endif
+
+ /* For future reference: Set errno on error, since we are called by
+ * yylex_init()
+ */
+ return 0;
+}
+
+/* yylex_destroy is for both reentrant and non-reentrant scanners. */
+int yylex_destroy (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* Pop the buffer stack, destroying each element. */
+ while(YY_CURRENT_BUFFER){
+ yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ yypop_buffer_state(yyscanner);
+ }
+
+ /* Destroy the stack itself. */
+ yyfree(yyg->yy_buffer_stack ,yyscanner);
+ yyg->yy_buffer_stack = NULL;
+
+ /* Destroy the start condition stack. */
+ yyfree(yyg->yy_start_stack ,yyscanner );
+ yyg->yy_start_stack = NULL;
+
+ /* Reset the globals. This is important in a non-reentrant scanner so the next time
+ * yylex() is called, initialization will occur. */
+ yy_init_globals( yyscanner);
+
+ /* Destroy the main struct (reentrant only). */
+ yyfree ( yyscanner , yyscanner );
+ yyscanner = NULL;
+ return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner)
+{
+ register int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner)
+{
+ register int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+}
+#endif
+
+void *yyalloc (yy_size_t size , yyscan_t yyscanner)
+{
+ return (void *) malloc( size );
+}
+
+void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner)
+{
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return (void *) realloc( (char *) ptr, size );
+}
+
+void yyfree (void * ptr , yyscan_t yyscanner)
+{
+ free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+#line 87 "_jsgf_scanner.l"
+
+
+
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h
new file mode 100644
index 000000000..72abefb88
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h
@@ -0,0 +1,352 @@
+#ifndef yyHEADER_H
+#define yyHEADER_H 1
+#define yyIN_HEADER 1
+
+#line 6 "jsgf_scanner.h"
+
+#line 8 "jsgf_scanner.h"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 37
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif /* defined (__STDC__) */
+#endif /* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+ are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#define YY_BUF_SIZE 16384
+#endif
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ yy_size_t yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+void yyrestart (FILE *input_file ,yyscan_t yyscanner );
+void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void yypop_buffer_state (yyscan_t yyscanner );
+
+YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
+
+void *yyalloc (yy_size_t ,yyscan_t yyscanner );
+void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void yyfree (void * ,yyscan_t yyscanner );
+
+/* Begin user sect3 */
+
+#define yywrap(yyscanner) 1
+#define YY_SKIP_YYWRAP
+
+#define yytext_ptr yytext_r
+
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+#define COMMENT 1
+#define DECL 2
+#define DECLCOMMENT 3
+
+#endif
+
+
+#ifdef HAVE_UNISTD_H
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+int yylex_init (yyscan_t* scanner);
+
+int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy (yyscan_t yyscanner );
+
+int yyget_debug (yyscan_t yyscanner );
+
+void yyset_debug (int debug_flag ,yyscan_t yyscanner );
+
+YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner );
+
+void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+
+FILE *yyget_in (yyscan_t yyscanner );
+
+void yyset_in (FILE * in_str ,yyscan_t yyscanner );
+
+FILE *yyget_out (yyscan_t yyscanner );
+
+void yyset_out (FILE * out_str ,yyscan_t yyscanner );
+
+yy_size_t yyget_leng (yyscan_t yyscanner );
+
+char *yyget_text (yyscan_t yyscanner );
+
+int yyget_lineno (yyscan_t yyscanner );
+
+void yyset_lineno (int line_number ,yyscan_t yyscanner );
+
+int yyget_column (yyscan_t yyscanner );
+
+void yyset_column (int column_no ,yyscan_t yyscanner );
+
+YYSTYPE * yyget_lval (yyscan_t yyscanner );
+
+void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap (yyscan_t yyscanner );
+#else
+extern int yywrap (yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex \
+ (YYSTYPE * yylval_param ,yyscan_t yyscanner);
+
+#define YY_DECL int yylex \
+ (YYSTYPE * yylval_param , yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+
+#line 87 "_jsgf_scanner.l"
+
+
+#line 348 "jsgf_scanner.h"
+#undef yyIN_HEADER
+#endif /* yyHEADER_H */
diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c
new file mode 100644
index 000000000..e9943001e
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c
@@ -0,0 +1,258 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file lm3g_model.c Core Sphinx 3-gram code used in
+ * DMP/DMP32/ARPA (for now) model code.
+ *
+ * Author: A cast of thousands, probably.
+ */
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "sphinxbase/listelem_alloc.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/err.h"
+
+#include "lm3g_model.h"
+
+void
+lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g)
+{
+ if (lm3g->tginfo == NULL)
+ return;
+ listelem_alloc_free(lm3g->le);
+ ckd_free(lm3g->tginfo);
+}
+
+void
+lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g)
+{
+ if (lm3g->tginfo == NULL)
+ return;
+ listelem_alloc_free(lm3g->le);
+ memset(lm3g->tginfo, 0, base->n_counts[0] * sizeof(tginfo_t *));
+ lm3g->le = listelem_alloc_init(sizeof(tginfo_t));
+}
+
+void
+lm3g_apply_weights(ngram_model_t *base,
+ lm3g_model_t *lm3g,
+ float32 lw, float32 wip, float32 uw)
+{
+ int32 log_wip, log_uw, log_uniform_weight;
+ int i;
+
+ /* Precalculate some log values we will like. */
+ log_wip = logmath_log(base->lmath, wip);
+ log_uw = logmath_log(base->lmath, uw);
+ log_uniform_weight = logmath_log(base->lmath, 1.0 - uw);
+
+ for (i = 0; i < base->n_counts[0]; ++i) {
+ int32 prob1, bo_wt, n_used;
+
+ /* Backoff weights just get scaled by the lw. */
+ bo_wt = (int32)(lm3g->unigrams[i].bo_wt1.l / base->lw);
+ /* Unscaling unigram probs is a bit more complicated, so punt
+ * it back to the general code. */
+ prob1 = ngram_ng_prob(base, i, NULL, 0, &n_used);
+ /* Now compute the new scaled probabilities. */
+ lm3g->unigrams[i].bo_wt1.l = (int32)(bo_wt * lw);
+ if (strcmp(base->word_str[i], "<s>") == 0) { /* FIXME: configurable start_sym */
+ /* Apply language weight and WIP */
+ lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip;
+ }
+ else {
+ /* Interpolate unigram probability with uniform. */
+ prob1 += log_uw;
+ prob1 = logmath_add(base->lmath, prob1, base->log_uniform + log_uniform_weight);
+ /* Apply language weight and WIP */
+ lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip;
+ }
+ }
+
+ for (i = 0; i < lm3g->n_prob2; ++i) {
+ int32 prob2;
+ /* Can't just punt this back to general code since it is quantized. */
+ prob2 = (int32)((lm3g->prob2[i].l - base->log_wip) / base->lw);
+ lm3g->prob2[i].l = (int32)(prob2 * lw) + log_wip;
+ }
+
+ if (base->n > 2) {
+ for (i = 0; i < lm3g->n_bo_wt2; ++i) {
+ lm3g->bo_wt2[i].l = (int32)(lm3g->bo_wt2[i].l / base->lw * lw);
+ }
+ for (i = 0; i < lm3g->n_prob3; i++) {
+ int32 prob3;
+ /* Can't just punt this back to general code since it is quantized. */
+ prob3 = (int32)((lm3g->prob3[i].l - base->log_wip) / base->lw);
+ lm3g->prob3[i].l = (int32)(prob3 * lw) + log_wip;
+ }
+ }
+
+ /* Store updated values in the model. */
+ base->log_wip = log_wip;
+ base->log_uw = log_uw;
+ base->log_uniform_weight = log_uniform_weight;
+ base->lw = lw;
+}
+
+int32
+lm3g_add_ug(ngram_model_t *base,
+ lm3g_model_t *lm3g, int32 wid, int32 lweight)
+{
+ int32 score;
+
+ /* This would be very bad if this happened! */
+ assert(!NGRAM_IS_CLASSWID(wid));
+
+ /* Reallocate unigram array. */
+ lm3g->unigrams = ckd_realloc(lm3g->unigrams,
+ sizeof(*lm3g->unigrams) * base->n_1g_alloc);
+ memset(lm3g->unigrams + base->n_counts[0], 0,
+ (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->unigrams));
+ /* Reallocate tginfo array. */
+ lm3g->tginfo = ckd_realloc(lm3g->tginfo,
+ sizeof(*lm3g->tginfo) * base->n_1g_alloc);
+ memset(lm3g->tginfo + base->n_counts[0], 0,
+ (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->tginfo));
+ /* FIXME: we really ought to update base->log_uniform *and*
+ * renormalize all the other unigrams. This is really slow, so I
+ * will probably just provide a function to renormalize after
+ * adding unigrams, for anyone who really cares. */
+ /* This could be simplified but then we couldn't do it in logmath */
+ score = lweight + base->log_uniform + base->log_uw;
+ score = logmath_add(base->lmath, score,
+ base->log_uniform + base->log_uniform_weight);
+ lm3g->unigrams[wid].prob1.l = score;
+ /* This unigram by definition doesn't participate in any bigrams,
+ * so its backoff weight and bigram pointer are both undefined. */
+ lm3g->unigrams[wid].bo_wt1.l = 0;
+ lm3g->unigrams[wid].bigrams = 0;
+ /* Finally, increase the unigram count */
+ ++base->n_counts[0];
+ /* FIXME: Note that this can actually be quite bogus due to the
+ * presence of class words. If wid falls outside the unigram
+ * count, increase it to compensate, at the cost of no longer
+ * really knowing how many unigrams we have :( */
+ if (wid >= base->n_counts[0])
+ base->n_counts[0] = wid + 1;
+
+ return score;
+}
+
+#define INITIAL_SORTED_ENTRIES MAX_UINT16
+
+void
+init_sorted_list(sorted_list_t * l)
+{
+ l->list = ckd_calloc(INITIAL_SORTED_ENTRIES, sizeof(sorted_entry_t));
+ l->list[0].val.l = INT_MIN;
+ l->list[0].lower = 0;
+ l->list[0].higher = 0;
+ l->free = 1;
+ l->size = INITIAL_SORTED_ENTRIES;
+}
+
+void
+free_sorted_list(sorted_list_t * l)
+{
+ free(l->list);
+}
+
+lmprob_t *
+vals_in_sorted_list(sorted_list_t * l)
+{
+ lmprob_t *vals;
+ int32 i;
+
+ vals = ckd_calloc(l->free, sizeof(lmprob_t));
+ for (i = 0; i < l->free; i++)
+ vals[i] = l->list[i].val;
+ return (vals);
+}
+
+int32
+sorted_id(sorted_list_t * l, int32 *val)
+{
+ int32 i = 0;
+
+ for (;;) {
+ if (*val == l->list[i].val.l)
+ return (i);
+ if (*val < l->list[i].val.l) {
+ if (l->list[i].lower == 0) {
+
+ if (l->free >= l->size) {
+ int newsize = l->size + INITIAL_SORTED_ENTRIES;
+ l->list = ckd_realloc(l->list, sizeof(sorted_entry_t) * newsize);
+ memset(l->list + l->size,
+ 0, INITIAL_SORTED_ENTRIES * sizeof(sorted_entry_t));
+ l->size = newsize;
+ }
+
+ l->list[i].lower = l->free;
+ (l->free)++;
+ i = l->list[i].lower;
+ l->list[i].val.l = *val;
+ return (i);
+ }
+ else
+ i = l->list[i].lower;
+ }
+ else {
+ if (l->list[i].higher == 0) {
+
+ if (l->free >= l->size) {
+ int newsize = l->size + INITIAL_SORTED_ENTRIES;
+ l->list = ckd_realloc(l->list, sizeof(sorted_entry_t) * newsize);
+ memset(l->list + l->size,
+ 0, INITIAL_SORTED_ENTRIES * sizeof(sorted_entry_t));
+ l->size = newsize;
+ }
+
+ l->list[i].higher = l->free;
+ (l->free)++;
+ i = l->list[i].higher;
+ l->list[i].val.l = *val;
+ return (i);
+ }
+ else
+ i = l->list[i].higher;
+ }
+ }
+}
diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h
new file mode 100644
index 000000000..698ed81f5
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h
@@ -0,0 +1,177 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file lm3g_model.h Core Sphinx 3-gram code used in
+ * DMP/DMP32/ARPA (for now) model code.
+ *
+ * Author: A cast of thousands, probably.
+ */
+
+#ifndef __NGRAM_MODEL_LM3G_H__
+#define __NGRAM_MODEL_LM3G_H__
+
+#include "sphinxbase/listelem_alloc.h"
+
+#include "ngram_model_internal.h"
+
+/**
+ * Type used to store language model probabilities
+ */
+typedef union {
+ float32 f;
+ int32 l;
+} lmprob_t;
+
+/**
+ * Bigram probs and bo-wts, and trigram probs are kept in separate
+ * tables rather than within the bigram_t and trigram_t structures.
+ * These tables hold unique prob and bo-wt values. The following tree
+ * structure is used to construct these tables of unique values.
+ * Whenever a new value is read from the LM file, the sorted tree
+ * structure is searched to see if the value already exists, and
+ * inserted if not found.
+ */
+typedef struct sorted_entry_s {
+ lmprob_t val; /**< value being kept in this node */
+ uint32 lower; /**< index of another entry. All descendants down
+ this path have their val < this node's val.
+ 0 => no son exists (0 is root index) */
+ uint32 higher; /**< index of another entry. All descendants down
+ this path have their val > this node's val
+ 0 => no son exists (0 is root index) */
+} sorted_entry_t;
+
+/**
+ * The sorted list. list is a (64K long) array. The first entry is the
+ * root of the tree and is created during initialization.
+ */
+typedef struct {
+ sorted_entry_t *list;
+ int32 free; /**< first free element in list */
+ int32 size;
+} sorted_list_t;
+
+/**
+ * Unigram structure (common among all lm3g implementations)
+ */
+typedef struct unigram_s {
+ lmprob_t prob1; /**< Unigram probability. */
+ lmprob_t bo_wt1; /**< Unigram backoff weight. */
+ int32 bigrams; /**< Index of 1st entry in lm_t.bigrams[] */
+} unigram_t;
+
+/**
+ * Bigram structure (might be implemented differently)
+ */
+typedef struct bigram_s bigram_t;
+/**
+ * Trigram structure (might be implemented differently)
+ */
+typedef struct trigram_s trigram_t;
+
+
+/*
+ * To conserve space, bigram info is kept in many tables. Since the number
+ * of distinct values << #bigrams, these table indices can be 16-bit values.
+ * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
+ * It is supposed to be the index of the first trigram entry for each bigram.
+ * But such an index cannot be represented in 16-bits, hence the following
+ * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
+ * consecutive entries, such that #trigrams in each segment <= 2**16 (the
+ * corresponding trigram segment). The bigram_t.trigrams value is then a
+ * 16-bit relative index within the trigram segment. A separate table--
+ * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
+ */
+#define BG_SEG_SZ 512 /* chosen so that #trigram/segment <= 2**16 */
+#define LOG_BG_SEG_SZ 9
+
+/**
+ * Trigram information cache.
+ *
+ * The following trigram information cache eliminates most traversals of 1g->2g->3g
+ * tree to locate trigrams for a given bigram (lw1,lw2). The organization is optimized
+ * for locality of access (to the same lw1), given lw2.
+ */
+typedef struct tginfo_s {
+ int32 w1; /**< lw1 component of bigram lw1,lw2. All bigrams with
+ same lw2 linked together (see lm_t.tginfo). */
+ int32 n_tg; /**< number tg for parent bigram lw1,lw2 */
+ int32 bowt; /**< tg bowt for lw1,lw2 */
+ int32 used; /**< whether used since last lm_reset */
+ trigram_t *tg; /**< Trigrams for lw1,lw2 */
+ struct tginfo_s *next; /**< Next lw1 with same parent lw2; NULL if none. */
+} tginfo_t;
+
+/**
+ * Common internal structure for Sphinx 3-gram models.
+ */
+typedef struct lm3g_model_s {
+ unigram_t *unigrams;
+ bigram_t *bigrams;
+ trigram_t *trigrams;
+ lmprob_t *prob2; /**< Table of actual bigram probs */
+ int32 n_prob2; /**< prob2 size */
+ lmprob_t *bo_wt2; /**< Table of actual bigram backoff weights */
+ int32 n_bo_wt2; /**< bo_wt2 size */
+ lmprob_t *prob3; /**< Table of actual trigram probs */
+ int32 n_prob3; /**< prob3 size */
+ int32 *tseg_base; /**< tseg_base[i>>LOG_BG_SEG_SZ] = index of 1st
+ trigram for bigram segment (i>>LOG_BG_SEG_SZ) */
+ tginfo_t **tginfo; /**< tginfo[lw2] is head of linked list of trigram information for
+ some cached subset of bigrams (*,lw2). */
+ listelem_alloc_t *le; /**< List element allocator for tginfo. */
+} lm3g_model_t;
+
+void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g);
+void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g);
+void lm3g_apply_weights(ngram_model_t *base,
+ lm3g_model_t *lm3g,
+ float32 lw, float32 wip, float32 uw);
+int32 lm3g_add_ug(ngram_model_t *base,
+ lm3g_model_t *lm3g, int32 wid, int32 lweight);
+
+
+/**
+ * Initialize sorted list with the 0-th entry = MIN_PROB_F, which may be needed
+ * to replace spurious values in the Darpa LM file.
+ */
+void init_sorted_list(sorted_list_t *l);
+void free_sorted_list(sorted_list_t *l);
+lmprob_t *vals_in_sorted_list(sorted_list_t *l);
+int32 sorted_id(sorted_list_t * l, int32 *val);
+
+#endif /* __NGRAM_MODEL_LM3G_H__ */
diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c
new file mode 100644
index 000000000..080cfa8e6
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c
@@ -0,0 +1,560 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file lm3g_templates.c Core Sphinx 3-gram code used in
+ * DMP/DMP32/ARPA (for now) model code.
+ */
+
+#include <assert.h>
+
+/* Locate a specific bigram within a bigram list */
+#define BINARY_SEARCH_THRESH 16
+static int32
+find_bg(bigram_t * bg, int32 n, int32 w)
+{
+ int32 i, b, e;
+
+ /* Binary search until segment size < threshold */
+ b = 0;
+ e = n;
+ while (e - b > BINARY_SEARCH_THRESH) {
+ i = (b + e) >> 1;
+ if (bg[i].wid < w)
+ b = i + 1;
+ else if (bg[i].wid > w)
+ e = i;
+ else
+ return i;
+ }
+
+ /* Linear search within narrowed segment */
+ for (i = b; (i < e) && (bg[i].wid != w); i++);
+ return ((i < e) ? i : -1);
+}
+
+static int32
+lm3g_bg_score(NGRAM_MODEL_TYPE *model,
+ int32 lw1, int32 lw2, int32 *n_used)
+{
+ int32 i, n, b, score;
+ bigram_t *bg;
+
+ if (lw1 < 0 || model->base.n < 2) {
+ *n_used = 1;
+ return model->lm3g.unigrams[lw2].prob1.l;
+ }
+
+ b = FIRST_BG(model, lw1);
+ n = FIRST_BG(model, lw1 + 1) - b;
+ bg = model->lm3g.bigrams + b;
+
+ if ((i = find_bg(bg, n, lw2)) >= 0) {
+ /* Access mode = bigram */
+ *n_used = 2;
+ score = model->lm3g.prob2[bg[i].prob2].l;
+ }
+ else {
+ /* Access mode = unigram */
+ *n_used = 1;
+ score = model->lm3g.unigrams[lw1].bo_wt1.l + model->lm3g.unigrams[lw2].prob1.l;
+ }
+
+ return (score);
+}
+
+static void
+load_tginfo(NGRAM_MODEL_TYPE *model, int32 lw1, int32 lw2)
+{
+ int32 i, n, b, t;
+ bigram_t *bg;
+ tginfo_t *tginfo;
+
+ /* First allocate space for tg information for bg lw1,lw2 */
+ tginfo = (tginfo_t *) listelem_malloc(model->lm3g.le);
+ tginfo->w1 = lw1;
+ tginfo->tg = NULL;
+ tginfo->next = model->lm3g.tginfo[lw2];
+ model->lm3g.tginfo[lw2] = tginfo;
+
+ /* Locate bigram lw1,lw2 */
+ b = model->lm3g.unigrams[lw1].bigrams;
+ n = model->lm3g.unigrams[lw1 + 1].bigrams - b;
+ bg = model->lm3g.bigrams + b;
+
+ if ((n > 0) && ((i = find_bg(bg, n, lw2)) >= 0)) {
+ tginfo->bowt = model->lm3g.bo_wt2[bg[i].bo_wt2].l;
+
+ /* Find t = Absolute first trigram index for bigram lw1,lw2 */
+ b += i; /* b = Absolute index of bigram lw1,lw2 on disk */
+ t = FIRST_TG(model, b);
+
+ tginfo->tg = model->lm3g.trigrams + t;
+
+ /* Find #tg for bigram w1,w2 */
+ tginfo->n_tg = FIRST_TG(model, b + 1) - t;
+ }
+ else { /* No bigram w1,w2 */
+ tginfo->bowt = 0;
+ tginfo->n_tg = 0;
+ }
+}
+
+/* Similar to find_bg */
+static int32
+find_tg(trigram_t * tg, int32 n, uint32 w)
+{
+ int32 i, b, e;
+
+ b = 0;
+ e = n;
+ while (e - b > BINARY_SEARCH_THRESH) {
+ i = (b + e) >> 1;
+ if (tg[i].wid < w)
+ b = i + 1;
+ else if (tg[i].wid > w)
+ e = i;
+ else
+ return i;
+ }
+
+ for (i = b; (i < e) && (tg[i].wid != w); i++);
+ return ((i < e) ? i : -1);
+}
+
+static int32
+lm3g_tg_score(NGRAM_MODEL_TYPE *model, int32 lw1,
+ int32 lw2, int32 lw3, int32 *n_used)
+{
+ ngram_model_t *base = &model->base;
+ int32 i, n, score;
+ trigram_t *tg;
+ tginfo_t *tginfo, *prev_tginfo;
+
+ if ((base->n < 3) || (lw1 < 0) || (lw2 < 0))
+ return (lm3g_bg_score(model, lw2, lw3, n_used));
+
+ prev_tginfo = NULL;
+ for (tginfo = model->lm3g.tginfo[lw2]; tginfo; tginfo = tginfo->next) {
+ if (tginfo->w1 == lw1)
+ break;
+ prev_tginfo = tginfo;
+ }
+
+ if (!tginfo) {
+ load_tginfo(model, lw1, lw2);
+ tginfo = model->lm3g.tginfo[lw2];
+ }
+ else if (prev_tginfo) {
+ prev_tginfo->next = tginfo->next;
+ tginfo->next = model->lm3g.tginfo[lw2];
+ model->lm3g.tginfo[lw2] = tginfo;
+ }
+
+ tginfo->used = 1;
+
+ /* Trigrams for w1,w2 now pointed to by tginfo */
+ n = tginfo->n_tg;
+ tg = tginfo->tg;
+ if ((i = find_tg(tg, n, lw3)) >= 0) {
+ /* Access mode = trigram */
+ *n_used = 3;
+ score = model->lm3g.prob3[tg[i].prob3].l;
+ }
+ else {
+ score = tginfo->bowt + lm3g_bg_score(model, lw2, lw3, n_used);
+ }
+
+ return (score);
+}
+
+static int32
+lm3g_template_score(ngram_model_t *base, int32 wid,
+ int32 *history, int32 n_hist,
+ int32 *n_used)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
+ switch (n_hist) {
+ case 0:
+ /* Access mode: unigram */
+ *n_used = 1;
+ return model->lm3g.unigrams[wid].prob1.l;
+ case 1:
+ return lm3g_bg_score(model, history[0], wid, n_used);
+ case 2:
+ default:
+ /* Anything greater than 2 is the same as a trigram for now. */
+ return lm3g_tg_score(model, history[1], history[0], wid, n_used);
+ }
+}
+
+static int32
+lm3g_template_raw_score(ngram_model_t *base, int32 wid,
+ int32 *history, int32 n_hist,
+ int32 *n_used)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
+ int32 score;
+
+ switch (n_hist) {
+ case 0:
+ /* Access mode: unigram */
+ *n_used = 1;
+ /* Undo insertion penalty. */
+ score = model->lm3g.unigrams[wid].prob1.l - base->log_wip;
+ /* Undo language weight. */
+ score = (int32)(score / base->lw);
+ /* Undo unigram interpolation */
+ if (strcmp(base->word_str[wid], "<s>") != 0) { /* FIXME: configurable start_sym */
+ /* This operation is numerically unstable, so try to avoid it
+ * as possible */
+ if (base->log_uniform + base->log_uniform_weight > logmath_get_zero(base->lmath)) {
+ score = logmath_log(base->lmath,
+ logmath_exp(base->lmath, score)
+ - logmath_exp(base->lmath,
+ base->log_uniform + base->log_uniform_weight));
+ }
+ }
+ return score;
+ case 1:
+ score = lm3g_bg_score(model, history[0], wid, n_used);
+ break;
+ case 2:
+ default:
+ /* Anything greater than 2 is the same as a trigram for now. */
+ score = lm3g_tg_score(model, history[1], history[0], wid, n_used);
+ break;
+ }
+ /* FIXME (maybe): This doesn't undo unigram weighting in backoff cases. */
+ return (int32)((score - base->log_wip) / base->lw);
+}
+
+static int32
+lm3g_template_add_ug(ngram_model_t *base,
+ int32 wid, int32 lweight)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
+ return lm3g_add_ug(base, &model->lm3g, wid, lweight);
+}
+
+static void
+lm3g_template_flush(ngram_model_t *base)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
+ lm3g_tginfo_reset(base, &model->lm3g);
+}
+
+typedef struct lm3g_iter_s {
+ ngram_iter_t base;
+ unigram_t *ug;
+ bigram_t *bg;
+ trigram_t *tg;
+} lm3g_iter_t;
+
+static ngram_iter_t *
+lm3g_template_iter(ngram_model_t *base, int32 wid,
+ int32 *history, int32 n_hist)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
+ lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));
+
+ ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE);
+
+ if (n_hist == 0) {
+ /* Unigram is the easiest. */
+ itor->ug = model->lm3g.unigrams + wid;
+ return (ngram_iter_t *)itor;
+ }
+ else if (n_hist == 1) {
+ int32 i, n, b;
+ /* Find the bigram, as in bg_score above (duplicate code...) */
+ itor->ug = model->lm3g.unigrams + history[0];
+ b = FIRST_BG(model, history[0]);
+ n = FIRST_BG(model, history[0] + 1) - b;
+ itor->bg = model->lm3g.bigrams + b;
+ /* If no such bigram exists then fail. */
+ if ((i = find_bg(itor->bg, n, wid)) < 0) {
+ ngram_iter_free((ngram_iter_t *)itor);
+ return NULL;
+ }
+ itor->bg += i;
+ return (ngram_iter_t *)itor;
+ }
+ else if (n_hist == 2) {
+ int32 i, n;
+ tginfo_t *tginfo, *prev_tginfo;
+ /* Find the trigram, as in tg_score above (duplicate code...) */
+ itor->ug = model->lm3g.unigrams + history[1];
+ prev_tginfo = NULL;
+ for (tginfo = model->lm3g.tginfo[history[0]];
+ tginfo; tginfo = tginfo->next) {
+ if (tginfo->w1 == history[1])
+ break;
+ prev_tginfo = tginfo;
+ }
+
+ if (!tginfo) {
+ load_tginfo(model, history[1], history[0]);
+ tginfo = model->lm3g.tginfo[history[0]];
+ }
+ else if (prev_tginfo) {
+ prev_tginfo->next = tginfo->next;
+ tginfo->next = model->lm3g.tginfo[history[0]];
+ model->lm3g.tginfo[history[0]] = tginfo;
+ }
+
+ tginfo->used = 1;
+
+ /* Trigrams for w1,w2 now pointed to by tginfo */
+ n = tginfo->n_tg;
+ itor->tg = tginfo->tg;
+ if ((i = find_tg(itor->tg, n, wid)) >= 0) {
+ itor->tg += i;
+ /* Now advance the bigram pointer accordingly. FIXME:
+ * Note that we actually already found the relevant bigram
+ * in load_tginfo. */
+ itor->bg = model->lm3g.bigrams;
+ while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))
+ <= (itor->tg - model->lm3g.trigrams))
+ ++itor->bg;
+ return (ngram_iter_t *)itor;
+ }
+ else {
+ ngram_iter_free((ngram_iter_t *)itor);
+ return (ngram_iter_t *)NULL;
+ }
+ }
+ else {
+ /* Should not happen. */
+ assert(n_hist == 0); /* Guaranteed to fail. */
+ ngram_iter_free((ngram_iter_t *)itor);
+ return NULL;
+ }
+}
+
+static ngram_iter_t *
+lm3g_template_mgrams(ngram_model_t *base, int m)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
+ lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));
+ ngram_iter_init((ngram_iter_t *)itor, base, m, FALSE);
+
+ itor->ug = model->lm3g.unigrams;
+ itor->bg = model->lm3g.bigrams;
+ itor->tg = model->lm3g.trigrams;
+
+ /* Advance bigram pointer to match first trigram. */
+ if (m > 1 && base->n_counts[1] > 1) {
+ while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))
+ <= (itor->tg - model->lm3g.trigrams))
+ ++itor->bg;
+ }
+
+ /* Advance unigram pointer to match first bigram. */
+ if (m > 0 && base->n_counts[0] > 1) {
+ while (itor->ug[1].bigrams <= (itor->bg - model->lm3g.bigrams))
+ ++itor->ug;
+ }
+
+ return (ngram_iter_t *)itor;
+}
+
+static ngram_iter_t *
+lm3g_template_successors(ngram_iter_t *bitor)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)bitor->model;
+ lm3g_iter_t *from = (lm3g_iter_t *)bitor;
+ lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));
+
+ itor->ug = from->ug;
+ switch (bitor->m) {
+ case 0:
+ /* Next itor bigrams is the same as this itor bigram or
+ itor bigrams is more than total count. This means no successors */
+ if (((itor->ug + 1) - model->lm3g.unigrams < bitor->model->n_counts[0] &&
+ itor->ug->bigrams == (itor->ug + 1)->bigrams) ||
+ itor->ug->bigrams == bitor->model->n_counts[1])
+ goto done;
+
+ /* Start iterating from first bigram successor of from->ug. */
+ itor->bg = model->lm3g.bigrams + itor->ug->bigrams;
+ break;
+ case 1:
+ itor->bg = from->bg;
+
+ /* This indicates no successors */
+ if (((itor->bg + 1) - model->lm3g.bigrams < bitor->model->n_counts[1] &&
+ FIRST_TG (model, itor->bg - model->lm3g.bigrams) ==
+ FIRST_TG (model, (itor->bg + 1) - model->lm3g.bigrams)) ||
+ FIRST_TG (model, itor->bg - model->lm3g.bigrams) == bitor->model->n_counts[2])
+ goto done;
+
+ /* Start iterating from first trigram successor of from->bg. */
+ itor->tg = (model->lm3g.trigrams
+ + FIRST_TG(model, (itor->bg - model->lm3g.bigrams)));
+#if 0
+ printf("%s %s => %d (%s)\n",
+ model->base.word_str[itor->ug - model->lm3g.unigrams],
+ model->base.word_str[itor->bg->wid],
+ FIRST_TG(model, (itor->bg - model->lm3g.bigrams)),
+ model->base.word_str[itor->tg->wid]);
+#endif
+ break;
+ case 2:
+ default:
+ /* All invalid! */
+ goto done;
+ }
+
+ ngram_iter_init((ngram_iter_t *)itor, bitor->model, bitor->m + 1, TRUE);
+ return (ngram_iter_t *)itor;
+ done:
+ ckd_free(itor);
+ return NULL;
+}
+
+static int32 const *
+lm3g_template_iter_get(ngram_iter_t *base,
+ int32 *out_score, int32 *out_bowt)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model;
+ lm3g_iter_t *itor = (lm3g_iter_t *)base;
+
+ base->wids[0] = itor->ug - model->lm3g.unigrams;
+ if (itor->bg) base->wids[1] = itor->bg->wid;
+ if (itor->tg) base->wids[2] = itor->tg->wid;
+#if 0
+ printf("itor_get: %d %d %d\n", base->wids[0], base->wids[1], base->wids[2]);
+#endif
+
+ switch (base->m) {
+ case 0:
+ *out_score = itor->ug->prob1.l;
+ *out_bowt = itor->ug->bo_wt1.l;
+ break;
+ case 1:
+ *out_score = model->lm3g.prob2[itor->bg->prob2].l;
+ if (model->lm3g.bo_wt2)
+ *out_bowt = model->lm3g.bo_wt2[itor->bg->bo_wt2].l;
+ else
+ *out_bowt = 0;
+ break;
+ case 2:
+ *out_score = model->lm3g.prob3[itor->tg->prob3].l;
+ *out_bowt = 0;
+ break;
+ default: /* Should not happen. */
+ return NULL;
+ }
+ return base->wids;
+}
+
+static ngram_iter_t *
+lm3g_template_iter_next(ngram_iter_t *base)
+{
+ NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model;
+ lm3g_iter_t *itor = (lm3g_iter_t *)base;
+
+ switch (base->m) {
+ case 0:
+ ++itor->ug;
+ /* Check for end condition. */
+ if (itor->ug - model->lm3g.unigrams >= base->model->n_counts[0])
+ goto done;
+ break;
+ case 1:
+ ++itor->bg;
+ /* Check for end condition. */
+ if (itor->bg - model->lm3g.bigrams >= base->model->n_counts[1])
+ goto done;
+ /* Advance unigram pointer if necessary in order to get one
+ * that points to this bigram. */
+ while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) {
+ /* Stop if this is a successor iterator, since we don't
+ * want a new unigram. */
+ if (base->successor)
+ goto done;
+ ++itor->ug;
+ if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) {
+ E_ERROR("Bigram %d has no valid unigram parent\n",
+ itor->bg - model->lm3g.bigrams);
+ goto done;
+ }
+ }
+ break;
+ case 2:
+ ++itor->tg;
+ /* Check for end condition. */
+ if (itor->tg - model->lm3g.trigrams >= base->model->n_counts[2])
+ goto done;
+ /* Advance bigram pointer if necessary. */
+ while (itor->tg - model->lm3g.trigrams >=
+ FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))) {
+ if (base->successor)
+ goto done;
+ ++itor->bg;
+ if (itor->bg == model->lm3g.bigrams + base->model->n_counts[1]) {
+ E_ERROR("Trigram %d has no valid bigram parent\n",
+ itor->tg - model->lm3g.trigrams);
+
+ goto done;
+ }
+ }
+ /* Advance unigram pointer if necessary. */
+ while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) {
+ ++itor->ug;
+ if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) {
+ E_ERROR("Trigram %d has no valid unigram parent\n",
+ itor->tg - model->lm3g.trigrams);
+ goto done;
+ }
+ }
+ break;
+ default: /* Should not happen. */
+ goto done;
+ }
+
+ return (ngram_iter_t *)itor;
+done:
+ ngram_iter_free(base);
+ return NULL;
+}
+
+static void
+lm3g_template_iter_free(ngram_iter_t *base)
+{
+ ckd_free(base);
+}
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c
new file mode 100644
index 000000000..02af4151b
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c
@@ -0,0 +1,1129 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file ngram_model.c N-Gram language models.
+ *
+ * Author: David Huggins-Daines, much code taken from sphinx3/src/libs3decoder/liblm
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+#include <assert.h>
+
+#include "sphinxbase/ngram_model.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/filename.h"
+#include "sphinxbase/pio.h"
+#include "sphinxbase/err.h"
+#include "sphinxbase/logmath.h"
+#include "sphinxbase/strfuncs.h"
+#include "sphinxbase/case.h"
+
+#include "ngram_model_internal.h"
+
+ngram_file_type_t
+ngram_file_name_to_type(const char *file_name)
+{
+ const char *ext;
+
+ ext = strrchr(file_name, '.');
+ if (ext == NULL) {
+ return NGRAM_INVALID;
+ }
+ if (0 == strcmp_nocase(ext, ".gz")) {
+ while (--ext >= file_name) {
+ if (*ext == '.') break;
+ }
+ if (ext < file_name) {
+ return NGRAM_INVALID;
+ }
+ }
+ else if (0 == strcmp_nocase(ext, ".bz2")) {
+ while (--ext >= file_name) {
+ if (*ext == '.') break;
+ }
+ if (ext < file_name) {
+ return NGRAM_INVALID;
+ }
+ }
+ /* We use strncmp because there might be a .gz on the end. */
+ if (0 == strncmp_nocase(ext, ".ARPA", 5))
+ return NGRAM_ARPA;
+ if (0 == strncmp_nocase(ext, ".DMP", 4))
+ return NGRAM_DMP;
+ return NGRAM_INVALID;
+ }
+
+ngram_file_type_t
+ngram_str_to_type(const char *str_name)
+{
+ if (0 == strcmp_nocase(str_name, "arpa"))
+ return NGRAM_ARPA;
+ if (0 == strcmp_nocase(str_name, "dmp"))
+ return NGRAM_DMP;
+ return NGRAM_INVALID;
+}
+
+char const *
+ngram_type_to_str(int type)
+{
+ switch (type) {
+ case NGRAM_ARPA:
+ return "arpa";
+ case NGRAM_DMP:
+ return "dmp";
+ default:
+ return NULL;
+ }
+}
+
+
+ ngram_model_t *
+ ngram_model_read(cmd_ln_t *config,
+ const char *file_name,
+ ngram_file_type_t file_type,
+ logmath_t *lmath)
+ {
+ ngram_model_t *model = NULL;
+
+ switch (file_type) {
+ case NGRAM_AUTO: {
+ if ((model = ngram_model_arpa_read(config, file_name, lmath)) != NULL)
+ break;
+ if ((model = ngram_model_dmp_read(config, file_name, lmath)) != NULL)
+ break;
+ return NULL;
+ }
+ case NGRAM_ARPA:
+ model = ngram_model_arpa_read(config, file_name, lmath);
+ break;
+ case NGRAM_DMP:
+ model = ngram_model_dmp_read(config, file_name, lmath);
+ break;
+ default:
+ E_ERROR("language model file type not supported\n");
+ return NULL;
+ }
+
+ /* Now set weights based on config if present. */
+ if (config) {
+ float32 lw = 1.0;
+ float32 wip = 1.0;
+ float32 uw = 1.0;
+
+ if (cmd_ln_exists_r(config, "-lw"))
+ lw = cmd_ln_float32_r(config, "-lw");
+ if (cmd_ln_exists_r(config, "-wip"))
+ wip = cmd_ln_float32_r(config, "-wip");
+ if (cmd_ln_exists_r(config, "-uw"))
+ uw = cmd_ln_float32_r(config, "-uw");
+
+ ngram_model_apply_weights(model, lw, wip, uw);
+ }
+
+ return model;
+ }
+
+ int
+ ngram_model_write(ngram_model_t *model, const char *file_name,
+ ngram_file_type_t file_type)
+ {
+ switch (file_type) {
+ case NGRAM_AUTO: {
+ file_type = ngram_file_name_to_type(file_name);
+ /* Default to ARPA (catches .lm and other things) */
+ if (file_type == NGRAM_INVALID)
+ file_type = NGRAM_ARPA;
+ return ngram_model_write(model, file_name, file_type);
+ }
+ case NGRAM_ARPA:
+ return ngram_model_arpa_write(model, file_name);
+ case NGRAM_DMP:
+ return ngram_model_dmp_write(model, file_name);
+ default:
+ E_ERROR("language model file type not supported\n");
+ return -1;
+ }
+ E_ERROR("language model file type not supported\n");
+ return -1;
+ }
+
+ int32
+ ngram_model_init(ngram_model_t *base,
+ ngram_funcs_t *funcs,
+ logmath_t *lmath,
+ int32 n, int32 n_unigram)
+ {
+ base->refcount = 1;
+ base->funcs = funcs;
+ base->n = n;
+ /* If this was previously initialized... */
+ if (base->n_counts == NULL)
+ base->n_counts = ckd_calloc(3, sizeof(*base->n_counts));
+ /* Don't reset weights if logmath object hasn't changed. */
+ if (base->lmath != lmath) {
+ /* Set default values for weights. */
+ base->lw = 1.0;
+ base->log_wip = 0; /* i.e. 1.0 */
+ base->log_uw = 0; /* i.e. 1.0 */
+ base->log_uniform = logmath_log(lmath, 1.0 / n_unigram);
+ base->log_uniform_weight = logmath_get_zero(lmath);
+ base->log_zero = logmath_get_zero(lmath);
+ base->lmath = lmath;
+ }
+ /* Allocate or reallocate space for word strings. */
+ if (base->word_str) {
+ /* Free all previous word strings if they were allocated. */
+ if (base->writable) {
+ int32 i;
+ for (i = 0; i < base->n_words; ++i) {
+ ckd_free(base->word_str[i]);
+ base->word_str[i] = NULL;
+ }
+ }
+ base->word_str = ckd_realloc(base->word_str, n_unigram * sizeof(char *));
+ }
+ else
+ base->word_str = ckd_calloc(n_unigram, sizeof(char *));
+ /* NOTE: They are no longer case-insensitive since we are allowing
+ * other encodings for word strings. Beware. */
+ if (base->wid)
+ hash_table_empty(base->wid);
+ else
+ base->wid = hash_table_new(n_unigram, FALSE);
+ base->n_counts[0] = base->n_1g_alloc = base->n_words = n_unigram;
+
+ return 0;
+}
+
+ngram_model_t *
+ngram_model_retain(ngram_model_t *model)
+{
+ ++model->refcount;
+ return model;
+}
+
+
+void
+ngram_model_flush(ngram_model_t *model)
+{
+ if (model->funcs && model->funcs->flush)
+ (*model->funcs->flush)(model);
+}
+
+int
+ngram_model_free(ngram_model_t *model)
+{
+ int i;
+
+ if (model == NULL)
+ return 0;
+ if (--model->refcount > 0)
+ return model->refcount;
+ if (model->funcs && model->funcs->free)
+ (*model->funcs->free)(model);
+ if (model->writable) {
+ /* Free all words. */
+ for (i = 0; i < model->n_words; ++i) {
+ ckd_free(model->word_str[i]);
+ }
+ }
+ else {
+ /* Free all class words. */
+ for (i = 0; i < model->n_classes; ++i) {
+ ngram_class_t *lmclass;
+ int32 j;
+
+ lmclass = model->classes[i];
+ for (j = 0; j < lmclass->n_words; ++j) {
+ ckd_free(model->word_str[lmclass->start_wid + j]);
+ }
+ for (j = 0; j < lmclass->n_hash; ++j) {
+ if (lmclass->nword_hash[j].wid != -1) {
+ ckd_free(model->word_str[lmclass->nword_hash[j].wid]);
+ }
+ }
+ }
+ }
+ for (i = 0; i < model->n_classes; ++i) {
+ ngram_class_free(model->classes[i]);
+ }
+ ckd_free(model->classes);
+ hash_table_free(model->wid);
+ ckd_free(model->word_str);
+ ckd_free(model->n_counts);
+ ckd_free(model);
+ return 0;
+}
+
+int
+ngram_model_casefold(ngram_model_t *model, int kase)
+{
+ int writable, i;
+ hash_table_t *new_wid;
+
+ /* Were word strings already allocated? */
+ writable = model->writable;
+ /* Either way, we are going to allocate some word strings. */
+ model->writable = TRUE;
+
+ /* And, don't forget, we need to rebuild the word to unigram ID
+ * mapping. */
+ new_wid = hash_table_new(model->n_words, FALSE);
+ for (i = 0; i < model->n_words; ++i) {
+ char *outstr;
+ if (writable) {
+ outstr = model->word_str[i];
+ }
+ else {
+ outstr = ckd_salloc(model->word_str[i]);
+ }
+ /* Don't case-fold <tags> or [classes] */
+ if (outstr[0] == '<' || outstr[0] == '[') {
+ }
+ else {
+ switch (kase) {
+ case NGRAM_UPPER:
+ ucase(outstr);
+ break;
+ case NGRAM_LOWER:
+ lcase(outstr);
+ break;
+ default:
+ ;
+ }
+ }
+ model->word_str[i] = outstr;
+
+ /* Now update the hash table. We might have terrible
+ * collisions here, so warn about them. */
+ if (hash_table_enter_int32(new_wid, model->word_str[i], i) != i) {
+ E_WARN("Duplicate word in dictionary after conversion: %s\n",
+ model->word_str[i]);
+ }
+ }
+ /* Swap out the hash table. */
+ hash_table_free(model->wid);
+ model->wid = new_wid;
+ return 0;
+}
+
+int
+ngram_model_apply_weights(ngram_model_t *model,
+ float32 lw, float32 wip, float32 uw)
+{
+ return (*model->funcs->apply_weights)(model, lw, wip, uw);
+}
+
+float32
+ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip,
+ int32 *out_log_uw)
+{
+ if (out_log_wip) *out_log_wip = model->log_wip;
+ if (out_log_uw) *out_log_uw = model->log_uw;
+ return model->lw;
+}
+
+
+int32
+ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history,
+ int32 n_hist, int32 *n_used)
+{
+ int32 score, class_weight = 0;
+ int i;
+
+ /* Closed vocabulary, OOV word probability is zero */
+ if (wid == NGRAM_INVALID_WID)
+ return model->log_zero;
+
+ /* "Declassify" wid and history */
+ if (NGRAM_IS_CLASSWID(wid)) {
+ ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)];
+
+ class_weight = ngram_class_prob(lmclass, wid);
+ if (class_weight == 1) /* Meaning, not found in class. */
+ return model->log_zero;
+ wid = lmclass->tag_wid;
+ }
+ for (i = 0; i < n_hist; ++i) {
+ if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i]))
+ history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid;
+ }
+ score = (*model->funcs->score)(model, wid, history, n_hist, n_used);
+
+ /* Multiply by unigram in-class weight. */
+ return score + class_weight;
+}
+
+int32
+ngram_score(ngram_model_t *model, const char *word, ...)
+{
+ va_list history;
+ const char *hword;
+ int32 *histid;
+ int32 n_hist;
+ int32 n_used;
+ int32 prob;
+
+ va_start(history, word);
+ n_hist = 0;
+ while ((hword = va_arg(history, const char *)) != NULL)
+ ++n_hist;
+ va_end(history);
+
+ histid = ckd_calloc(n_hist, sizeof(*histid));
+ va_start(history, word);
+ n_hist = 0;
+ while ((hword = va_arg(history, const char *)) != NULL) {
+ histid[n_hist] = ngram_wid(model, hword);
+ ++n_hist;
+ }
+ va_end(history);
+
+ prob = ngram_ng_score(model, ngram_wid(model, word),
+ histid, n_hist, &n_used);
+ ckd_free(histid);
+ return prob;
+}
+
+int32
+ngram_tg_score(ngram_model_t *model, int32 w3, int32 w2, int32 w1, int32 *n_used)
+{
+ int32 hist[2];
+ hist[0] = w2;
+ hist[1] = w1;
+ return ngram_ng_score(model, w3, hist, 2, n_used);
+}
+
+int32
+ngram_bg_score(ngram_model_t *model, int32 w2, int32 w1, int32 *n_used)
+{
+ return ngram_ng_score(model, w2, &w1, 1, n_used);
+}
+
+int32
+ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history,
+ int32 n_hist, int32 *n_used)
+{
+ int32 prob, class_weight = 0;
+ int i;
+
+ /* Closed vocabulary, OOV word probability is zero */
+ if (wid == NGRAM_INVALID_WID)
+ return model->log_zero;
+
+ /* "Declassify" wid and history */
+ if (NGRAM_IS_CLASSWID(wid)) {
+ ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)];
+
+ class_weight = ngram_class_prob(lmclass, wid);
+ if (class_weight == 1) /* Meaning, not found in class. */
+ return class_weight;
+ wid = lmclass->tag_wid;
+ }
+ for (i = 0; i < n_hist; ++i) {
+ if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i]))
+ history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid;
+ }
+ prob = (*model->funcs->raw_score)(model, wid, history,
+ n_hist, n_used);
+ /* Multiply by unigram in-class weight. */
+ return prob + class_weight;
+}
+
+int32
+ngram_probv(ngram_model_t *model, const char *word, ...)
+{
+ va_list history;
+ const char *hword;
+ int32 *histid;
+ int32 n_hist;
+ int32 n_used;
+ int32 prob;
+
+ va_start(history, word);
+ n_hist = 0;
+ while ((hword = va_arg(history, const char *)) != NULL)
+ ++n_hist;
+ va_end(history);
+
+ histid = ckd_calloc(n_hist, sizeof(*histid));
+ va_start(history, word);
+ n_hist = 0;
+ while ((hword = va_arg(history, const char *)) != NULL) {
+ histid[n_hist] = ngram_wid(model, hword);
+ ++n_hist;
+ }
+ va_end(history);
+
+ prob = ngram_ng_prob(model, ngram_wid(model, word),
+ histid, n_hist, &n_used);
+ ckd_free(histid);
+ return prob;
+}
+
+int32
+ngram_prob(ngram_model_t *model, const char *const *words, int32 n)
+{
+ int32 *ctx_id;
+ int32 nused;
+ int32 prob;
+ int32 wid;
+ uint32 i;
+
+ ctx_id = (int32 *)ckd_calloc(n - 1, sizeof(*ctx_id));
+ for (i = 1; i < n; ++i)
+ ctx_id[i - 1] = ngram_wid(model, words[i]);
+
+ wid = ngram_wid(model, *words);
+ prob = ngram_ng_prob(model, wid, ctx_id, n - 1, &nused);
+ ckd_free(ctx_id);
+
+ return prob;
+}
+
+int32
+ngram_score_to_prob(ngram_model_t *base, int32 score)
+{
+ int32 prob;
+
+ /* Undo insertion penalty. */
+ prob = score - base->log_wip;
+ /* Undo language weight. */
+ prob = (int32)(prob / base->lw);
+
+ return prob;
+}
+
+int32
+ngram_unknown_wid(ngram_model_t *model)
+{
+ int32 val;
+
+ /* FIXME: This could be memoized for speed if necessary. */
+ /* Look up <UNK>, if not found return NGRAM_INVALID_WID. */
+ if (hash_table_lookup_int32(model->wid, "<UNK>", &val) == -1)
+ return NGRAM_INVALID_WID;
+ else
+ return val;
+}
+
+int32
+ngram_zero(ngram_model_t *model)
+{
+ return model->log_zero;
+}
+
+int32
+ngram_model_get_size(ngram_model_t *model)
+{
+ if (model != NULL)
+ return model->n;
+ return 0;
+}
+
+int32 const *
+ngram_model_get_counts(ngram_model_t *model)
+{
+ if (model != NULL)
+ return model->n_counts;
+ return NULL;
+}
+
+void
+ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model,
+ int m, int successor)
+{
+ itor->model = model;
+ itor->wids = ckd_calloc(model->n, sizeof(*itor->wids));
+ itor->m = m;
+ itor->successor = successor;
+}
+
+ngram_iter_t *
+ngram_model_mgrams(ngram_model_t *model, int m)
+{
+ ngram_iter_t *itor;
+ /* The fact that m=n-1 is not exactly obvious. Prevent accidents. */
+ if (m >= model->n)
+ return NULL;
+ if (model->funcs->mgrams == NULL)
+ return NULL;
+ itor = (*model->funcs->mgrams)(model, m);
+ return itor;
+}
+
+ngram_iter_t *
+ngram_iter(ngram_model_t *model, const char *word, ...)
+{
+ va_list history;
+ const char *hword;
+ int32 *histid;
+ int32 n_hist;
+ ngram_iter_t *itor;
+
+ va_start(history, word);
+ n_hist = 0;
+ while ((hword = va_arg(history, const char *)) != NULL)
+ ++n_hist;
+ va_end(history);
+
+ histid = ckd_calloc(n_hist, sizeof(*histid));
+ va_start(history, word);
+ n_hist = 0;
+ while ((hword = va_arg(history, const char *)) != NULL) {
+ histid[n_hist] = ngram_wid(model, hword);
+ ++n_hist;
+ }
+ va_end(history);
+
+ itor = ngram_ng_iter(model, ngram_wid(model, word), histid, n_hist);
+ ckd_free(histid);
+ return itor;
+}
+
+ngram_iter_t *
+ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist)
+{
+ if (n_hist >= model->n)
+ return NULL;
+ if (model->funcs->iter == NULL)
+ return NULL;
+ return (*model->funcs->iter)(model, wid, history, n_hist);
+}
+
+ngram_iter_t *
+ngram_iter_successors(ngram_iter_t *itor)
+{
+ /* Stop when we are at the highest order N-Gram. */
+ if (itor->m == itor->model->n - 1)
+ return NULL;
+ return (*itor->model->funcs->successors)(itor);
+}
+
+int32 const *
+ngram_iter_get(ngram_iter_t *itor,
+ int32 *out_score,
+ int32 *out_bowt)
+{
+ return (*itor->model->funcs->iter_get)(itor, out_score, out_bowt);
+}
+
+ngram_iter_t *
+ngram_iter_next(ngram_iter_t *itor)
+{
+ return (*itor->model->funcs->iter_next)(itor);
+}
+
+void
+ngram_iter_free(ngram_iter_t *itor)
+{
+ ckd_free(itor->wids);
+ (*itor->model->funcs->iter_free)(itor);
+}
+
+int32
+ngram_wid(ngram_model_t *model, const char *word)
+{
+ int32 val;
+
+ if (hash_table_lookup_int32(model->wid, word, &val) == -1)
+ return ngram_unknown_wid(model);
+ else
+ return val;
+}
+
+const char *
+ngram_word(ngram_model_t *model, int32 wid)
+{
+ /* Remove any class tag */
+ wid = NGRAM_BASEWID(wid);
+ if (wid >= model->n_words)
+ return NULL;
+ return model->word_str[wid];
+}
+
+/**
+ * Add a word to the word string and ID mapping.
+ */
+int32
+ngram_add_word_internal(ngram_model_t *model,
+ const char *word,
+ int32 classid)
+{
+
+ /* Check for hash collisions. */
+ int32 wid;
+ if (hash_table_lookup_int32(model->wid, word, &wid) == 0) {
+ E_WARN("Omit duplicate word '%s'\n", word);
+ return wid;
+ }
+
+ /* Take the next available word ID */
+ wid = model->n_words;
+ if (classid >= 0) {
+ wid = NGRAM_CLASSWID(wid, classid);
+ }
+
+ /* Reallocate word_str if necessary. */
+ if (model->n_words >= model->n_1g_alloc) {
+ model->n_1g_alloc += UG_ALLOC_STEP;
+ model->word_str = ckd_realloc(model->word_str,
+ sizeof(*model->word_str) * model->n_1g_alloc);
+ }
+ /* Add the word string in the appropriate manner. */
+ /* Class words are always dynamically allocated. */
+ model->word_str[model->n_words] = ckd_salloc(word);
+ /* Now enter it into the hash table. */
+ if (hash_table_enter_int32(model->wid, model->word_str[model->n_words], wid) != wid) {
+ E_ERROR("Hash insertion failed for word %s => %p (should not happen)\n",
+ model->word_str[model->n_words], (void *)(long)(wid));
+ }
+ /* Increment number of words. */
+ ++model->n_words;
+ return wid;
+}
+
+int32
+ngram_model_add_word(ngram_model_t *model,
+ const char *word, float32 weight)
+{
+ int32 wid, prob = model->log_zero;
+
+ /* If we add word to unwritable model, we need to make it writable */
+ if (!model->writable) {
+ E_WARN("Can't add word '%s' to read-only language model. "
+ "Disable mmap with '-mmap no' to make it writable\n", word);
+ return -1;
+ }
+
+ wid = ngram_add_word_internal(model, word, -1);
+ if (wid == NGRAM_INVALID_WID)
+ return wid;
+
+ /* Do what needs to be done to add the word to the unigram. */
+ if (model->funcs && model->funcs->add_ug)
+ prob = (*model->funcs->add_ug)(model, wid, logmath_log(model->lmath, weight));
+ if (prob == 0)
+ return -1;
+
+ return wid;
+}
+
+ngram_class_t *
+ngram_class_new(ngram_model_t *model, int32 tag_wid, int32 start_wid, glist_t classwords)
+{
+ ngram_class_t *lmclass;
+ gnode_t *gn;
+ float32 tprob;
+ int i;
+
+ lmclass = ckd_calloc(1, sizeof(*lmclass));
+ lmclass->tag_wid = tag_wid;
+ /* wid_base is the wid (minus class tag) of the first word in the list. */
+ lmclass->start_wid = start_wid;
+ lmclass->n_words = glist_count(classwords);
+ lmclass->prob1 = ckd_calloc(lmclass->n_words, sizeof(*lmclass->prob1));
+ lmclass->nword_hash = NULL;
+ lmclass->n_hash = 0;
+ tprob = 0.0;
+ for (gn = classwords; gn; gn = gnode_next(gn)) {
+ tprob += gnode_float32(gn);
+ }
+ if (tprob > 1.1 || tprob < 0.9) {
+ E_INFO("Total class probability is %f, will normalize\n", tprob);
+ for (gn = classwords; gn; gn = gnode_next(gn)) {
+ gn->data.fl /= tprob;
+ }
+ }
+ for (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) {
+ lmclass->prob1[i] = logmath_log(model->lmath, gnode_float32(gn));
+ }
+
+ return lmclass;
+}
+
+int32
+ngram_class_add_word(ngram_class_t *lmclass, int32 wid, int32 lweight)
+{
+ int32 hash;
+
+ if (lmclass->nword_hash == NULL) {
+ /* Initialize everything in it to -1 */
+ lmclass->nword_hash = ckd_malloc(NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash));
+ memset(lmclass->nword_hash, 0xff, NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash));
+ lmclass->n_hash = NGRAM_HASH_SIZE;
+ lmclass->n_hash_inuse = 0;
+ }
+ /* Stupidest possible hash function. This will work pretty well
+ * when this function is called repeatedly with contiguous word
+ * IDs, though... */
+ hash = wid & (lmclass->n_hash - 1);
+ if (lmclass->nword_hash[hash].wid == -1) {
+ /* Good, no collision. */
+ lmclass->nword_hash[hash].wid = wid;
+ lmclass->nword_hash[hash].prob1 = lweight;
+ ++lmclass->n_hash_inuse;
+ return hash;
+ }
+ else {
+ int32 next; /**< Next available bucket. */
+ /* Collision... Find the end of the hash chain. */
+ while (lmclass->nword_hash[hash].next != -1)
+ hash = lmclass->nword_hash[hash].next;
+ assert(hash != -1);
+ /* Does we has any more bukkit? */
+ if (lmclass->n_hash_inuse == lmclass->n_hash) {
+ /* Oh noes! Ok, we makes more. */
+ lmclass->nword_hash = ckd_realloc(lmclass->nword_hash,
+ lmclass->n_hash * 2 * sizeof(*lmclass->nword_hash));
+ memset(lmclass->nword_hash + lmclass->n_hash,
+ 0xff, lmclass->n_hash * sizeof(*lmclass->nword_hash));
+ /* Just use the next allocated one (easy) */
+ next = lmclass->n_hash;
+ lmclass->n_hash *= 2;
+ }
+ else {
+ /* Look for any available bucket. We hope this doesn't happen. */
+ for (next = 0; next < lmclass->n_hash; ++next)
+ if (lmclass->nword_hash[next].wid == -1)
+ break;
+ /* This should absolutely not happen. */
+ assert(next != lmclass->n_hash);
+ }
+ lmclass->nword_hash[next].wid = wid;
+ lmclass->nword_hash[next].prob1 = lweight;
+ lmclass->nword_hash[hash].next = next;
+ ++lmclass->n_hash_inuse;
+ return next;
+ }
+}
+
+void
+ngram_class_free(ngram_class_t *lmclass)
+{
+ ckd_free(lmclass->nword_hash);
+ ckd_free(lmclass->prob1);
+ ckd_free(lmclass);
+}
+
+int32
+ngram_model_add_class_word(ngram_model_t *model,
+ const char *classname,
+ const char *word,
+ float32 weight)
+{
+ ngram_class_t *lmclass;
+ int32 classid, tag_wid, wid, i, scale;
+ float32 fprob;
+
+ /* Find the class corresponding to classname. Linear search
+ * probably okay here since there won't be very many classes, and
+ * this doesn't have to be fast. */
+ tag_wid = ngram_wid(model, classname);
+ if (tag_wid == NGRAM_INVALID_WID) {
+ E_ERROR("No such word or class tag: %s\n", classname);
+ return tag_wid;
+ }
+ for (classid = 0; classid < model->n_classes; ++classid) {
+ if (model->classes[classid]->tag_wid == tag_wid)
+ break;
+ }
+ /* Hmm, no such class. It's probably not a good idea to create one. */
+ if (classid == model->n_classes) {
+ E_ERROR("Word %s is not a class tag (call ngram_model_add_class() first)\n", classname);
+ return NGRAM_INVALID_WID;
+ }
+ lmclass = model->classes[classid];
+
+ /* Add this word to the model's set of words. */
+ wid = ngram_add_word_internal(model, word, classid);
+ if (wid == NGRAM_INVALID_WID)
+ return wid;
+
+ /* This is the fixed probability of the new word. */
+ fprob = weight * 1.0f / (lmclass->n_words + lmclass->n_hash_inuse + 1);
+ /* Now normalize everything else to fit it in. This is
+ * accomplished by simply scaling all the other probabilities
+ * by (1-fprob). */
+ scale = logmath_log(model->lmath, 1.0 - fprob);
+ for (i = 0; i < lmclass->n_words; ++i)
+ lmclass->prob1[i] += scale;
+ for (i = 0; i < lmclass->n_hash; ++i)
+ if (lmclass->nword_hash[i].wid != -1)
+ lmclass->nword_hash[i].prob1 += scale;
+
+ /* Now add it to the class hash table. */
+ return ngram_class_add_word(lmclass, wid, logmath_log(model->lmath, fprob));
+}
+
+int32
+ngram_model_add_class(ngram_model_t *model,
+ const char *classname,
+ float32 classweight,
+ char **words,
+ const float32 *weights,
+ int32 n_words)
+{
+ ngram_class_t *lmclass;
+ glist_t classwords = NULL;
+ int32 i, start_wid = -1;
+ int32 classid, tag_wid;
+
+ /* Check if classname already exists in model. If not, add it.*/
+ if ((tag_wid = ngram_wid(model, classname)) == ngram_unknown_wid(model)) {
+ tag_wid = ngram_model_add_word(model, classname, classweight);
+ if (tag_wid == NGRAM_INVALID_WID)
+ return -1;
+ }
+
+ if (model->n_classes == 128) {
+ E_ERROR("Number of classes cannot exceed 128 (sorry)\n");
+ return -1;
+ }
+ classid = model->n_classes;
+ for (i = 0; i < n_words; ++i) {
+ int32 wid;
+
+ wid = ngram_add_word_internal(model, words[i], classid);
+ if (wid == NGRAM_INVALID_WID)
+ return -1;
+ if (start_wid == -1)
+ start_wid = NGRAM_BASEWID(wid);
+ classwords = glist_add_float32(classwords, weights[i]);
+ }
+ classwords = glist_reverse(classwords);
+ lmclass = ngram_class_new(model, tag_wid, start_wid, classwords);
+ glist_free(classwords);
+ if (lmclass == NULL)
+ return -1;
+
+ ++model->n_classes;
+ if (model->classes == NULL)
+ model->classes = ckd_calloc(1, sizeof(*model->classes));
+ else
+ model->classes = ckd_realloc(model->classes,
+ model->n_classes * sizeof(*model->classes));
+ model->classes[classid] = lmclass;
+ return classid;
+}
+
+int32
+ngram_class_prob(ngram_class_t *lmclass, int32 wid)
+{
+ int32 base_wid = NGRAM_BASEWID(wid);
+
+ if (base_wid < lmclass->start_wid
+ || base_wid > lmclass->start_wid + lmclass->n_words) {
+ int32 hash;
+
+ /* Look it up in the hash table. */
+ hash = wid & (lmclass->n_hash - 1);
+ while (hash != -1 && lmclass->nword_hash[hash].wid != wid)
+ hash = lmclass->nword_hash[hash].next;
+ if (hash == -1)
+ return 1;
+ return lmclass->nword_hash[hash].prob1;
+ }
+ else {
+ return lmclass->prob1[base_wid - lmclass->start_wid];
+ }
+}
+
+int32
+read_classdef_file(hash_table_t *classes, const char *file_name)
+{
+ FILE *fp;
+ int32 is_pipe;
+ int inclass; /**< Are we currently reading a list of class words? */
+ int32 rv = -1;
+ gnode_t *gn;
+ glist_t classwords = NULL;
+ glist_t classprobs = NULL;
+ char *classname = NULL;
+
+ if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) {
+ E_ERROR("File %s not found\n", file_name);
+ return -1;
+ }
+
+ inclass = FALSE;
+ while (!feof(fp)) {
+ char line[512];
+ char *wptr[2];
+ int n_words;
+
+ if (fgets(line, sizeof(line), fp) == NULL)
+ break;
+
+ n_words = str2words(line, wptr, 2);
+ if (n_words <= 0)
+ continue;
+
+ if (inclass) {
+ /* Look for an end of class marker. */
+ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) {
+ classdef_t *classdef;
+ gnode_t *word, *weight;
+ int32 i;
+
+ if (classname == NULL || 0 != strcmp(wptr[1], classname))
+ goto error_out;
+ inclass = FALSE;
+
+ /* Construct a class from the list of words collected. */
+ classdef = ckd_calloc(1, sizeof(*classdef));
+ classwords = glist_reverse(classwords);
+ classprobs = glist_reverse(classprobs);
+ classdef->n_words = glist_count(classwords);
+ classdef->words = ckd_calloc(classdef->n_words,
+ sizeof(*classdef->words));
+ classdef->weights = ckd_calloc(classdef->n_words,
+ sizeof(*classdef->weights));
+ word = classwords;
+ weight = classprobs;
+ for (i = 0; i < classdef->n_words; ++i) {
+ classdef->words[i] = gnode_ptr(word);
+ classdef->weights[i] = gnode_float32(weight);
+ word = gnode_next(word);
+ weight = gnode_next(weight);
+ }
+
+ /* Add this class to the hash table. */
+ if (hash_table_enter(classes, classname, classdef) != classdef) {
+ classdef_free(classdef);
+ goto error_out;
+ }
+
+ /* Reset everything. */
+ glist_free(classwords);
+ glist_free(classprobs);
+ classwords = NULL;
+ classprobs = NULL;
+ classname = NULL;
+ }
+ else {
+ float32 fprob;
+
+ if (n_words == 2)
+ fprob = (float32)atof_c(wptr[1]);
+ else
+ fprob = 1.0f;
+ /* Add it to the list of words for this class. */
+ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0]));
+ classprobs = glist_add_float32(classprobs, fprob);
+ }
+ }
+ else {
+ /* Start a new LM class if the LMCLASS marker is seen */
+ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) {
+ if (inclass)
+ goto error_out;
+ inclass = TRUE;
+ classname = ckd_salloc(wptr[1]);
+ }
+ /* Otherwise, just ignore whatever junk we got */
+ }
+ }
+ rv = 0; /* Success. */
+
+error_out:
+ /* Free all the stuff we might have allocated. */
+ fclose_comp(fp, is_pipe);
+ for (gn = classwords; gn; gn = gnode_next(gn))
+ ckd_free(gnode_ptr(gn));
+ glist_free(classwords);
+ glist_free(classprobs);
+ ckd_free(classname);
+
+ return rv;
+}
+
+void
+classdef_free(classdef_t *classdef)
+{
+ int32 i;
+ for (i = 0; i < classdef->n_words; ++i)
+ ckd_free(classdef->words[i]);
+ ckd_free(classdef->words);
+ ckd_free(classdef->weights);
+ ckd_free(classdef);
+}
+
+
+int32
+ngram_model_read_classdef(ngram_model_t *model,
+ const char *file_name)
+{
+ hash_table_t *classes;
+ glist_t hl = NULL;
+ gnode_t *gn;
+ int32 rv = -1;
+
+ classes = hash_table_new(0, FALSE);
+ if (read_classdef_file(classes, file_name) < 0) {
+ hash_table_free(classes);
+ return -1;
+ }
+
+ /* Create a new class in the language model for each classdef. */
+ hl = hash_table_tolist(classes, NULL);
+ for (gn = hl; gn; gn = gnode_next(gn)) {
+ hash_entry_t *he = gnode_ptr(gn);
+ classdef_t *classdef = he->val;
+
+ if (ngram_model_add_class(model, he->key, 1.0,
+ classdef->words,
+ classdef->weights,
+ classdef->n_words) < 0)
+ goto error_out;
+ }
+ rv = 0;
+
+error_out:
+ for (gn = hl; gn; gn = gnode_next(gn)) {
+ hash_entry_t *he = gnode_ptr(gn);
+ ckd_free((char *)he->key);
+ classdef_free(he->val);
+ }
+ glist_free(hl);
+ hash_table_free(classes);
+ return rv;
+}
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c
new file mode 100644
index 000000000..a4b72cb00
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c
@@ -0,0 +1,660 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file ngram_model_arpa.c ARPA format language models
+ *
+ * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#include "sphinxbase/ckd_alloc.h"
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "sphinxbase/err.h"
+#include "sphinxbase/pio.h"
+#include "sphinxbase/listelem_alloc.h"
+#include "sphinxbase/strfuncs.h"
+
+#include "ngram_model_arpa.h"
+
+static ngram_funcs_t ngram_model_arpa_funcs;
+
+#define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ])
+#define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams)
+#define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams))
+
+/*
+ * Read and return #unigrams, #bigrams, #trigrams as stated in input file.
+ */
+static int
+ReadNgramCounts(lineiter_t **li, int32 * n_ug, int32 * n_bg, int32 * n_tg)
+{
+ int32 ngram, ngram_cnt;
+
+ /* skip file until past the '\data\' marker */
+ while (*li) {
+ string_trim((*li)->buf, STRING_BOTH);
+ if (strcmp((*li)->buf, "\\data\\") == 0)
+ break;
+ *li = lineiter_next(*li);
+ }
+ if (*li == NULL || strcmp((*li)->buf, "\\data\\") != 0) {
+ E_INFO("No \\data\\ mark in LM file\n");
+ return -1;
+ }
+
+ *n_ug = *n_bg = *n_tg = 0;
+ while ((*li = lineiter_next(*li))) {
+ if (sscanf((*li)->buf, "ngram %d=%d", &ngram, &ngram_cnt) != 2)
+ break;
+ switch (ngram) {
+ case 1:
+ *n_ug = ngram_cnt;
+ break;
+ case 2:
+ *n_bg = ngram_cnt;
+ break;
+ case 3:
+ *n_tg = ngram_cnt;
+ break;
+ default:
+ E_ERROR("Unknown ngram (%d)\n", ngram);
+ return -1;
+ }
+ }
+ if (*li == NULL) {
+ E_ERROR("EOF while reading ngram counts\n");
+ return -1;
+ }
+
+ /* Position iterator to the unigrams header '\1-grams:\' */
+ while ((*li = lineiter_next(*li))) {
+ string_trim((*li)->buf, STRING_BOTH);
+ if (strcmp((*li)->buf, "\\1-grams:") == 0)
+ break;
+ }
+ if (*li == NULL) {
+ E_ERROR_SYSTEM("Failed to read \\1-grams: mark");
+ return -1;
+ }
+
+ if ((*n_ug <= 0) || (*n_bg < 0) || (*n_tg < 0)) {
+ E_ERROR("Bad or missing ngram count\n");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Read in the unigrams from given file into the LM structure model.
+ * On entry to this procedure, the iterator is positioned to the
+ * header line '\1-grams:'.
+ */
+static int
+ReadUnigrams(lineiter_t **li, ngram_model_arpa_t * model)
+{
+ ngram_model_t *base = &model->base;
+ int32 wcnt;
+ float p1;
+
+ E_INFO("Reading unigrams\n");
+
+ wcnt = 0;
+ while ((*li = lineiter_next(*li))) {
+ char *wptr[3], *name;
+ float32 bo_wt = 0.0f;
+ int n;
+
+ string_trim((*li)->buf, STRING_BOTH);
+ if (strcmp((*li)->buf, "\\2-grams:") == 0
+ || strcmp((*li)->buf, "\\end\\") == 0)
+ break;
+
+ if ((n = str2words((*li)->buf, wptr, 3)) < 2) {
+ if ((*li)->buf[0] != '\0')
+ E_WARN("Format error; unigram ignored: %s\n", (*li)->buf);
+ continue;
+ }
+ else {
+ p1 = (float)atof_c(wptr[0]);
+ name = wptr[1];
+ if (n == 3)
+ bo_wt = (float)atof_c(wptr[2]);
+ }
+
+ if (wcnt >= base->n_counts[0]) {
+ E_ERROR("Too many unigrams\n");
+ return -1;
+ }
+
+ /* Associate name with word id */
+ base->word_str[wcnt] = ckd_salloc(name);
+ if ((hash_table_enter(base->wid, base->word_str[wcnt], (void *)(long)wcnt))
+ != (void *)(long)wcnt) {
+ E_WARN("Duplicate word in dictionary: %s\n", base->word_str[wcnt]);
+ }
+ model->lm3g.unigrams[wcnt].prob1.l = logmath_log10_to_log(base->lmath, p1);
+ model->lm3g.unigrams[wcnt].bo_wt1.l = logmath_log10_to_log(base->lmath, bo_wt);
+ wcnt++;
+ }
+
+ if (base->n_counts[0] != wcnt) {
+ E_WARN("lm_t.ucount(%d) != #unigrams read(%d)\n",
+ base->n_counts[0], wcnt);
+ base->n_counts[0] = wcnt;
+ base->n_words = wcnt;
+ }
+ return 0;
+}
+
+/*
+ * Read bigrams from given file into given model structure.
+ */
+static int
+ReadBigrams(lineiter_t **li, ngram_model_arpa_t * model)
+{
+ ngram_model_t *base = &model->base;
+ int32 w1, w2, prev_w1, bgcount;
+ bigram_t *bgptr;
+
+ E_INFO("Reading bigrams\n");
+
+ bgcount = 0;
+ bgptr = model->lm3g.bigrams;
+ prev_w1 = -1;
+
+ while ((*li = lineiter_next(*li))) {
+ float32 p, bo_wt = 0.0f;
+ int32 p2, bo_wt2;
+ char *wptr[4], *word1, *word2;
+ int n;
+
+ string_trim((*li)->buf, STRING_BOTH);
+ wptr[3] = NULL;
+ if ((n = str2words((*li)->buf, wptr, 4)) < 3) {
+ if ((*li)->buf[0] != '\0')
+ break;
+ continue;
+ }
+ else {
+ p = (float32)atof_c(wptr[0]);
+ word1 = wptr[1];
+ word2 = wptr[2];
+ if (wptr[3])
+ bo_wt = (float32)atof_c(wptr[3]);
+ }
+
+ if ((w1 = ngram_wid(base, word1)) == NGRAM_INVALID_WID) {
+ E_ERROR("Unknown word: %s, skipping bigram (%s %s)\n",
+ word1, word1, word2);
+ continue;
+ }
+ if ((w2 = ngram_wid(base, word2)) == NGRAM_INVALID_WID) {
+ E_ERROR("Unknown word: %s, skipping bigram (%s %s)\n",
+ word2, word1, word2);
+ continue;
+ }
+
+ /* FIXME: Should use logmath_t quantization here. */
+ /* HACK!! to quantize probs to 4 decimal digits */
+ p = (float32)((int32)(p * 10000)) / 10000;
+ bo_wt = (float32)((int32)(bo_wt * 10000)) / 10000;
+
+ p2 = logmath_log10_to_log(base->lmath, p);
+ bo_wt2 = logmath_log10_to_log(base->lmath, bo_wt);
+
+ if (bgcount >= base->n_counts[1]) {
+ E_ERROR("Too many bigrams\n");
+ return -1;
+ }
+
+ bgptr->wid = w2;
+ bgptr->prob2 = sorted_id(&model->sorted_prob2, &p2);
+ if (base->n_counts[2] > 0)
+ bgptr->bo_wt2 = sorted_id(&model->sorted_bo_wt2, &bo_wt2);
+
+ if (w1 != prev_w1) {
+ if (w1 < prev_w1) {
+ E_ERROR("Bigram %s %s not in unigram order word id: %d prev word id: %d\n", word1, word2, w1, prev_w1);
+ return -1;
+ }
+
+ for (prev_w1++; prev_w1 <= w1; prev_w1++)
+ model->lm3g.unigrams[prev_w1].bigrams = bgcount;
+ prev_w1 = w1;
+ }
+ bgcount++;
+ bgptr++;
+
+ if ((bgcount & 0x0000ffff) == 0) {
+ E_INFOCONT(".");
+ }
+ }
+ if (*li == NULL || ((strcmp((*li)->buf, "\\end\\") != 0)
+ && (strcmp((*li)->buf, "\\3-grams:") != 0))) {
+ E_ERROR("Bad bigram: %s\n", (*li)->buf);
+ return -1;
+ }
+
+ for (prev_w1++; prev_w1 <= base->n_counts[0]; prev_w1++)
+ model->lm3g.unigrams[prev_w1].bigrams = bgcount;
+
+ return 0;
+}
+
+/*
+ * Very similar to ReadBigrams.
+ */
+static int
+ReadTrigrams(lineiter_t **li, ngram_model_arpa_t * model)
+{
+ ngram_model_t *base = &model->base;
+ int32 i, w1, w2, w3, prev_w1, prev_w2, tgcount, prev_bg, bg, endbg;
+ int32 seg, prev_seg, prev_seg_lastbg;
+ trigram_t *tgptr;
+ bigram_t *bgptr;
+
+ E_INFO("Reading trigrams\n");
+
+ tgcount = 0;
+ tgptr = model->lm3g.trigrams;
+ prev_w1 = -1;
+ prev_w2 = -1;
+ prev_bg = -1;
+ prev_seg = -1;
+
+ while ((*li = lineiter_next(*li))) {
+ float32 p;
+ int32 p3;
+ char *wptr[4], *word1, *word2, *word3;
+
+ string_trim((*li)->buf, STRING_BOTH);
+ if (str2words((*li)->buf, wptr, 4) != 4) {
+ if ((*li)->buf[0] != '\0')
+ break;
+ continue;
+ }
+ else {
+ p = (float32)atof_c(wptr[0]);
+ word1 = wptr[1];
+ word2 = wptr[2];
+ word3 = wptr[3];
+ }
+
+ if ((w1 = ngram_wid(base, word1)) == NGRAM_INVALID_WID) {
+ E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n",
+ word1, word1, word2, word3);
+ continue;
+ }
+ if ((w2 = ngram_wid(base, word2)) == NGRAM_INVALID_WID) {
+ E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n",
+ word2, word1, word2, word3);
+ continue;
+ }
+ if ((w3 = ngram_wid(base, word3)) == NGRAM_INVALID_WID) {
+ E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n",
+ word3, word1, word2, word3);
+ continue;
+ }
+
+ /* FIXME: Should use logmath_t quantization here. */
+ /* HACK!! to quantize probs to 4 decimal digits */
+ p = (float32)((int32)(p * 10000)) / 10000;
+ p3 = logmath_log10_to_log(base->lmath, p);
+
+ if (tgcount >= base->n_counts[2]) {
+ E_ERROR("Too many trigrams\n");
+ return -1;
+ }
+
+ tgptr->wid = w3;
+ tgptr->prob3 = sorted_id(&model->sorted_prob3, &p3);
+
+ if ((w1 != prev_w1) || (w2 != prev_w2)) {
+ /* Trigram for a new bigram; update tg info for all previous bigrams */
+ if ((w1 < prev_w1) || ((w1 == prev_w1) && (w2 < prev_w2))) {
+ E_ERROR("Trigrams not in bigram order\n");
+ return -1;
+ }
+
+ bg = (w1 !=
+ prev_w1) ? model->lm3g.unigrams[w1].bigrams : prev_bg + 1;
+ endbg = model->lm3g.unigrams[w1 + 1].bigrams;
+ bgptr = model->lm3g.bigrams + bg;
+ for (; (bg < endbg) && (bgptr->wid != w2); bg++, bgptr++);
+ if (bg >= endbg) {
+ E_ERROR("Missing bigram for trigram: %s", (*li)->buf);
+ return -1;
+ }
+
+ /* bg = bigram entry index for <w1,w2>. Update tseg_base */
+ seg = bg >> LOG_BG_SEG_SZ;
+ for (i = prev_seg + 1; i <= seg; i++)
+ model->lm3g.tseg_base[i] = tgcount;
+
+ /* Update trigrams pointers for all bigrams until bg */
+ if (prev_seg < seg) {
+ int32 tgoff = 0;
+
+ if (prev_seg >= 0) {
+ tgoff = tgcount - model->lm3g.tseg_base[prev_seg];
+ if (tgoff > 65535) {
+ E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n");
+ return -1;
+ }
+ }
+
+ prev_seg_lastbg = ((prev_seg + 1) << LOG_BG_SEG_SZ) - 1;
+ bgptr = model->lm3g.bigrams + prev_bg;
+ for (++prev_bg, ++bgptr; prev_bg <= prev_seg_lastbg;
+ prev_bg++, bgptr++)
+ bgptr->trigrams = tgoff;
+
+ for (; prev_bg <= bg; prev_bg++, bgptr++)
+ bgptr->trigrams = 0;
+ }
+ else {
+ int32 tgoff;
+
+ tgoff = tgcount - model->lm3g.tseg_base[prev_seg];
+ if (tgoff > 65535) {
+ E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n");
+ return -1;
+ }
+
+ bgptr = model->lm3g.bigrams + prev_bg;
+ for (++prev_bg, ++bgptr; prev_bg <= bg; prev_bg++, bgptr++)
+ bgptr->trigrams = tgoff;
+ }
+
+ prev_w1 = w1;
+ prev_w2 = w2;
+ prev_bg = bg;
+ prev_seg = seg;
+ }
+
+ tgcount++;
+ tgptr++;
+
+ if ((tgcount & 0x0000ffff) == 0) {
+ E_INFOCONT(".");
+ }
+ }
+ if (*li == NULL || strcmp((*li)->buf, "\\end\\") != 0) {
+ E_ERROR("Bad trigram: %s\n", (*li)->buf);
+ return -1;
+ }
+
+ for (prev_bg++; prev_bg <= base->n_counts[1]; prev_bg++) {
+ if ((prev_bg & (BG_SEG_SZ - 1)) == 0)
+ model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ] = tgcount;
+ if ((tgcount - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ]) > 65535) {
+ E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n");
+ return -1;
+ }
+ model->lm3g.bigrams[prev_bg].trigrams =
+ tgcount - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ];
+ }
+ return 0;
+}
+
+static unigram_t *
+new_unigram_table(int32 n_ug)
+{
+ unigram_t *table;
+ int32 i;
+
+ table = ckd_calloc(n_ug, sizeof(unigram_t));
+ for (i = 0; i < n_ug; i++) {
+ table[i].prob1.l = INT_MIN;
+ table[i].bo_wt1.l = INT_MIN;
+ }
+ return table;
+}
+
+ngram_model_t *
+ngram_model_arpa_read(cmd_ln_t *config,
+ const char *file_name,
+ logmath_t *lmath)
+{
+ lineiter_t *li;
+ FILE *fp;
+ int32 is_pipe;
+ int32 n_unigram;
+ int32 n_bigram;
+ int32 n_trigram;
+ int32 n;
+ ngram_model_arpa_t *model;
+ ngram_model_t *base;
+
+ if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) {
+ E_ERROR("File %s not found\n", file_name);
+ return NULL;
+ }
+ li = lineiter_start(fp);
+
+ /* Read #unigrams, #bigrams, #trigrams from file */
+ if (ReadNgramCounts(&li, &n_unigram, &n_bigram, &n_trigram) == -1) {
+ lineiter_free(li);
+ fclose_comp(fp, is_pipe);
+ return NULL;
+ }
+ E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram);
+
+ /* Allocate space for LM, including initial OOVs and placeholders; initialize it */
+ model = ckd_calloc(1, sizeof(*model));
+ base = &model->base;
+ if (n_trigram > 0)
+ n = 3;
+ else if (n_bigram > 0)
+ n = 2;
+ else
+ n = 1;
+ /* Initialize base model. */
+ ngram_model_init(base, &ngram_model_arpa_funcs, lmath, n, n_unigram);
+ base->n_counts[0] = n_unigram;
+ base->n_counts[1] = n_bigram;
+ base->n_counts[2] = n_trigram;
+ base->writable = TRUE;
+
+ /*
+ * Allocate one extra unigram and bigram entry: sentinels to terminate
+ * followers (bigrams and trigrams, respectively) of previous entry.
+ */
+ model->lm3g.unigrams = new_unigram_table(n_unigram + 1);
+ model->lm3g.bigrams =
+ ckd_calloc(n_bigram + 1, sizeof(bigram_t));
+ if (n_trigram > 0)
+ model->lm3g.trigrams =
+ ckd_calloc(n_trigram, sizeof(trigram_t));
+
+ if (n_trigram > 0) {
+ model->lm3g.tseg_base =
+ ckd_calloc((n_bigram + 1) / BG_SEG_SZ + 1,
+ sizeof(int32));
+ }
+ if (ReadUnigrams(&li, model) == -1) {
+ fclose_comp(fp, is_pipe);
+ ngram_model_free(base);
+ return NULL;
+ }
+ E_INFO("%8d = #unigrams created\n", base->n_counts[0]);
+
+ if (base->n_counts[2] > 0)
+ init_sorted_list(&model->sorted_bo_wt2);
+
+ if (base->n_counts[1] > 0) {
+ init_sorted_list(&model->sorted_prob2);
+
+ if (ReadBigrams(&li, model) == -1) {
+ fclose_comp(fp, is_pipe);
+ ngram_model_free(base);
+ return NULL;
+ }
+
+ base->n_counts[1] = FIRST_BG(model, base->n_counts[0]);
+ model->lm3g.n_prob2 = model->sorted_prob2.free;
+ model->lm3g.prob2 = vals_in_sorted_list(&model->sorted_prob2);
+ free_sorted_list(&model->sorted_prob2);
+ E_INFO("%8d = #bigrams created\n", base->n_counts[1]);
+ E_INFO("%8d = #prob2 entries\n", model->lm3g.n_prob2);
+ }
+
+ if (base->n_counts[2] > 0) {
+ /* Create trigram bo-wts array */
+ model->lm3g.n_bo_wt2 = model->sorted_bo_wt2.free;
+ model->lm3g.bo_wt2 = vals_in_sorted_list(&model->sorted_bo_wt2);
+ free_sorted_list(&model->sorted_bo_wt2);
+ E_INFO("%8d = #bo_wt2 entries\n", model->lm3g.n_bo_wt2);
+
+ init_sorted_list(&model->sorted_prob3);
+
+ if (ReadTrigrams(&li, model) == -1) {
+ fclose_comp(fp, is_pipe);
+ ngram_model_free(base);
+ return NULL;
+ }
+
+ base->n_counts[2] = FIRST_TG(model, base->n_counts[1]);
+ model->lm3g.n_prob3 = model->sorted_prob3.free;
+ model->lm3g.prob3 = vals_in_sorted_list(&model->sorted_prob3);
+ E_INFO("%8d = #trigrams created\n", base->n_counts[2]);
+ E_INFO("%8d = #prob3 entries\n", model->lm3g.n_prob3);
+
+ free_sorted_list(&model->sorted_prob3);
+
+ /* Initialize tginfo */
+ model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *));
+ model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t));
+ }
+
+ lineiter_free(li);
+ fclose_comp(fp, is_pipe);
+ return base;
+}
+
+int
+ngram_model_arpa_write(ngram_model_t *model,
+ const char *file_name)
+{
+ ngram_iter_t *itor;
+ FILE *fh;
+ int i;
+
+ if ((fh = fopen(file_name, "w")) == NULL) {
+ E_ERROR_SYSTEM("Failed to open %s for writing", file_name);
+ return -1;
+ }
+ fprintf(fh, "This is an ARPA-format language model file, generated by CMU Sphinx\n");
+
+ /* The ARPA format doesn't require any extra information that
+ * N-Gram iterators can't give us, so this is very
+ * straightforward compared with DMP writing. */
+
+ /* Write N-gram counts. */
+ fprintf(fh, "\\data\\\n");
+ for (i = 0; i < model->n; ++i) {
+ fprintf(fh, "ngram %d=%d\n", i+1, model->n_counts[i]);
+ }
+
+ /* Write N-grams */
+ for (i = 0; i < model->n; ++i) {
+ fprintf(fh, "\n\\%d-grams:\n", i + 1);
+ for (itor = ngram_model_mgrams(model, i); itor; itor = ngram_iter_next(itor)) {
+ int32 const *wids;
+ int32 score, bowt;
+ int j;
+
+ wids = ngram_iter_get(itor, &score, &bowt);
+ fprintf(fh, "%.4f ", logmath_log_to_log10(model->lmath, score));
+ for (j = 0; j <= i; ++j) {
+ assert(wids[j] < model->n_counts[0]);
+ fprintf(fh, "%s ", model->word_str[wids[j]]);
+ }
+ if (i < model->n-1)
+ fprintf(fh, "%.4f", logmath_log_to_log10(model->lmath, bowt));
+ fprintf(fh, "\n");
+ }
+ }
+ fprintf(fh, "\n\\end\\\n");
+ return fclose(fh);
+}
+
+static int
+ngram_model_arpa_apply_weights(ngram_model_t *base, float32 lw,
+ float32 wip, float32 uw)
+{
+ ngram_model_arpa_t *model = (ngram_model_arpa_t *)base;
+ lm3g_apply_weights(base, &model->lm3g, lw, wip, uw);
+ return 0;
+}
+
+/* Lousy "templating" for things that are largely the same in DMP and
+ * ARPA models, except for the bigram and trigram types and some
+ * names. */
+#define NGRAM_MODEL_TYPE ngram_model_arpa_t
+#include "lm3g_templates.c"
+
+static void
+ngram_model_arpa_free(ngram_model_t *base)
+{
+ ngram_model_arpa_t *model = (ngram_model_arpa_t *)base;
+ ckd_free(model->lm3g.unigrams);
+ ckd_free(model->lm3g.bigrams);
+ ckd_free(model->lm3g.trigrams);
+ ckd_free(model->lm3g.prob2);
+ ckd_free(model->lm3g.bo_wt2);
+ ckd_free(model->lm3g.prob3);
+ lm3g_tginfo_free(base, &model->lm3g);
+ ckd_free(model->lm3g.tseg_base);
+}
+
+static ngram_funcs_t ngram_model_arpa_funcs = {
+ ngram_model_arpa_free, /* free */
+ ngram_model_arpa_apply_weights, /* apply_weights */
+ lm3g_template_score, /* score */
+ lm3g_template_raw_score, /* raw_score */
+ lm3g_template_add_ug, /* add_ug */
+ lm3g_template_flush, /* flush */
+ lm3g_template_iter, /* iter */
+ lm3g_template_mgrams, /* mgrams */
+ lm3g_template_successors, /* successors */
+ lm3g_template_iter_get, /* iter_get */
+ lm3g_template_iter_next, /* iter_next */
+ lm3g_template_iter_free /* iter_free */
+};
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h
new file mode 100644
index 000000000..2fd9e427d
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h
@@ -0,0 +1,86 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file ngram_model_arpa.h ARPABO text format for N-Gram models
+ *
+ * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#ifndef __NGRAM_MODEL_ARPA_H__
+#define __NGRAM_MODEL_ARPA_H__
+
+#include "ngram_model_internal.h"
+#include "lm3g_model.h"
+
+/**
+ * Bigram structure.
+ */
+struct bigram_s {
+ uint32 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */
+ uint16 prob2; /**< Index into array of actual bigram probs */
+ uint16 bo_wt2; /**< Index into array of actual bigram backoff wts */
+ uint16 trigrams; /**< Index of 1st entry in lm_t.trigrams[],
+ RELATIVE TO its segment base (see above) */
+};
+
+/**
+ * Trigram structure.
+ *
+ * As with bigrams, trigram prob info kept in a separate table for conserving
+ * memory space.
+ */
+struct trigram_s {
+ uint32 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */
+ uint16 prob3; /**< Index into array of actual trigram probs */
+};
+
+
+/**
+ * Subclass of ngram_model for ARPA file reading.
+ */
+typedef struct ngram_model_arpa_s {
+ ngram_model_t base; /**< Base ngram_model_t structure */
+ lm3g_model_t lm3g; /**< Shared lm3g structure */
+
+ /* Arrays of unique bigram probs and bo-wts, and trigram probs
+ * (these are temporary, actually) */
+ sorted_list_t sorted_prob2;
+ sorted_list_t sorted_bo_wt2;
+ sorted_list_t sorted_prob3;
+} ngram_model_arpa_t;
+
+#endif /* __NGRAM_MODEL_ARPA_H__ */
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c
new file mode 100644
index 000000000..c6a2d8b85
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c
@@ -0,0 +1,969 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file ngram_model_dmp.c DMP format language models
+ *
+ * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/pio.h"
+#include "sphinxbase/err.h"
+#include "sphinxbase/byteorder.h"
+#include "sphinxbase/listelem_alloc.h"
+
+#include "ngram_model_dmp.h"
+
+static const char darpa_hdr[] = "Darpa Trigram LM";
+static ngram_funcs_t ngram_model_dmp_funcs;
+
+#define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ])
+#define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams)
+#define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams))
+
+static unigram_t *
+new_unigram_table(int32 n_ug)
+{
+ unigram_t *table;
+ int32 i;
+
+ table = ckd_calloc(n_ug, sizeof(unigram_t));
+ for (i = 0; i < n_ug; i++) {
+ table[i].prob1.f = -99.0;
+ table[i].bo_wt1.f = -99.0;
+ }
+ return table;
+}
+
+ngram_model_t *
+ngram_model_dmp_read(cmd_ln_t *config,
+ const char *file_name,
+ logmath_t *lmath)
+{
+ ngram_model_t *base;
+ ngram_model_dmp_t *model;
+ FILE *fp;
+ int do_mmap, do_swap;
+ int32 is_pipe;
+ int32 i, j, k, vn, n, ts;
+ int32 n_unigram;
+ int32 n_bigram;
+ int32 n_trigram;
+ char str[1024];
+ unigram_t *ugptr;
+ bigram_t *bgptr;
+ trigram_t *tgptr;
+ char *tmp_word_str;
+ char *map_base = NULL;
+ size_t offset = 0;
+
+ base = NULL;
+ do_mmap = FALSE;
+ if (config)
+ do_mmap = cmd_ln_boolean_r(config, "-mmap");
+
+ if ((fp = fopen_comp(file_name, "rb", &is_pipe)) == NULL) {
+ E_ERROR("Dump file %s not found\n", file_name);
+ goto error_out;
+ }
+
+ if (is_pipe && do_mmap) {
+ E_WARN("Dump file is compressed, will not use memory-mapped I/O\n");
+ do_mmap = 0;
+ }
+
+ do_swap = FALSE;
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (k != strlen(darpa_hdr)+1) {
+ SWAP_INT32(&k);
+ if (k != strlen(darpa_hdr)+1) {
+ E_ERROR("Wrong magic header size number %x: %s is not a dump file\n", k, file_name);
+ goto error_out;
+ }
+ do_swap = 1;
+ }
+ if (fread(str, 1, k, fp) != (size_t) k) {
+ E_ERROR("Cannot read header\n");
+ goto error_out;
+ }
+ if (strncmp(str, darpa_hdr, k) != 0) {
+ E_ERROR("Wrong header %s: %s is not a dump file\n", darpa_hdr);
+ goto error_out;
+ }
+
+ if (do_mmap) {
+ if (do_swap) {
+ E_INFO
+ ("Byteswapping required, will not use memory-mapped I/O for LM file\n");
+ do_mmap = 0;
+ }
+ else {
+ E_INFO("Will use memory-mapped I/O for LM file\n");
+#ifdef __ADSPBLACKFIN__ /* This is true for both VisualDSP++ and uClinux. */
+ E_FATAL("memory mapping is not supported at the moment.");
+#else
+#endif
+ }
+ }
+
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ if (fread(str, 1, k, fp) != (size_t) k) {
+ E_ERROR("Cannot read LM filename in header\n");
+ goto error_out;
+ }
+
+ /* read version#, if present (must be <= 0) */
+ if (fread(&vn, sizeof(vn), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&vn);
+ if (vn <= 0) {
+ /* read and don't compare timestamps (we don't care) */
+ if (fread(&ts, sizeof(ts), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&ts);
+
+ /* read and skip format description */
+ for (;;) {
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ if (k == 0)
+ break;
+ if (fread(str, 1, k, fp) != (size_t) k) {
+ E_ERROR("Failed to read word\n");
+ goto error_out;
+ }
+ }
+ /* read model->ucount */
+ if (fread(&n_unigram, sizeof(n_unigram), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&n_unigram);
+ }
+ else {
+ n_unigram = vn;
+ }
+
+ /* read model->bcount, tcount */
+ if (fread(&n_bigram, sizeof(n_bigram), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&n_bigram);
+ if (fread(&n_trigram, sizeof(n_trigram), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&n_trigram);
+ E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram);
+
+ /* Allocate space for LM, including initial OOVs and placeholders; initialize it */
+ model = ckd_calloc(1, sizeof(*model));
+ base = &model->base;
+ if (n_trigram > 0)
+ n = 3;
+ else if (n_bigram > 0)
+ n = 2;
+ else
+ n = 1;
+ ngram_model_init(base, &ngram_model_dmp_funcs, lmath, n, n_unigram);
+ base->n_counts[0] = n_unigram;
+ base->n_counts[1] = n_bigram;
+ base->n_counts[2] = n_trigram;
+
+ /* read unigrams (always in memory, as they contain dictionary
+ * mappings that can't be precomputed, and also could have OOVs added) */
+ model->lm3g.unigrams = new_unigram_table(n_unigram + 1);
+ ugptr = model->lm3g.unigrams;
+ for (i = 0; i <= n_unigram; ++i) {
+ /* Skip over the mapping ID, we don't care about it. */
+ if (fread(ugptr, sizeof(int32), 1, fp) != 1) {
+ E_ERROR("Failed to read maping id %d\n", i);
+ goto error_out;
+ }
+ /* Read the actual unigram structure. */
+ if (fread(ugptr, sizeof(unigram_t), 1, fp) != 1) {
+ E_ERROR("Failed to read unigrams data\n");
+ ngram_model_free(base);
+ fclose_comp(fp, is_pipe);
+ return NULL;
+ }
+ /* Byte swap if necessary. */
+ if (do_swap) {
+ SWAP_INT32(&ugptr->prob1.l);
+ SWAP_INT32(&ugptr->bo_wt1.l);
+ SWAP_INT32(&ugptr->bigrams);
+ }
+ /* Convert values to log. */
+ ugptr->prob1.l = logmath_log10_to_log(lmath, ugptr->prob1.f);
+ ugptr->bo_wt1.l = logmath_log10_to_log(lmath, ugptr->bo_wt1.f);
+ E_DEBUG(2, ("ug %d: prob %d bo %d bigrams %d\n",
+ i, ugptr->prob1.l, ugptr->bo_wt1.l, ugptr->bigrams));
+ ++ugptr;
+ }
+ E_INFO("%8d = LM.unigrams(+trailer) read\n", n_unigram);
+
+ /* Now mmap() the file and read in the rest of the (read-only) stuff. */
+ if (do_mmap) {
+ offset = ftell(fp);
+
+ /* Check for improper word alignment. */
+ if (offset & 0x3) {
+ E_WARN("-mmap specified, but trigram index is not word-aligned. Will not memory-map.\n");
+ do_mmap = FALSE;
+ }
+ else {
+ model->dump_mmap = mmio_file_read(file_name);
+ if (model->dump_mmap == NULL) {
+ do_mmap = FALSE;
+ }
+ else {
+ map_base = mmio_file_ptr(model->dump_mmap);
+ }
+ }
+ }
+
+ if (n_bigram > 0) {
+ /* read bigrams */
+ if (do_mmap) {
+ model->lm3g.bigrams = (bigram_t *) (map_base + offset);
+ offset += (n_bigram + 1) * sizeof(bigram_t);
+ }
+ else {
+ model->lm3g.bigrams =
+ ckd_calloc(n_bigram + 1, sizeof(bigram_t));
+ if (fread(model->lm3g.bigrams, sizeof(bigram_t), n_bigram + 1, fp)
+ != (size_t) n_bigram + 1) {
+ E_ERROR("Failed to read bigrams data\n");
+ goto error_out;
+ }
+ if (do_swap) {
+ for (i = 0, bgptr = model->lm3g.bigrams; i <= n_bigram;
+ i++, bgptr++) {
+ SWAP_INT16(&bgptr->wid);
+ SWAP_INT16(&bgptr->prob2);
+ SWAP_INT16(&bgptr->bo_wt2);
+ SWAP_INT16(&bgptr->trigrams);
+ }
+ }
+ }
+ E_INFO("%8d = LM.bigrams(+trailer) read\n", n_bigram);
+ }
+
+ /* read trigrams */
+ if (n_trigram > 0) {
+ if (do_mmap) {
+ model->lm3g.trigrams = (trigram_t *) (map_base + offset);
+ offset += n_trigram * sizeof(trigram_t);
+ }
+ else {
+ model->lm3g.trigrams =
+ ckd_calloc(n_trigram, sizeof(trigram_t));
+ if (fread
+ (model->lm3g.trigrams, sizeof(trigram_t), n_trigram, fp)
+ != (size_t) n_trigram) {
+ E_ERROR("Failed to read trigrams data\n");
+ goto error_out;
+ }
+ if (do_swap) {
+ for (i = 0, tgptr = model->lm3g.trigrams; i < n_trigram;
+ i++, tgptr++) {
+ SWAP_INT16(&tgptr->wid);
+ SWAP_INT16(&tgptr->prob3);
+ }
+ }
+ }
+ E_INFO("%8d = LM.trigrams read\n", n_trigram);
+ /* Initialize tginfo */
+ model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *));
+ model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t));
+ }
+
+ if (n_bigram > 0) {
+ /* read n_prob2 and prob2 array (in memory) */
+ if (do_mmap)
+ fseek(fp, offset, SEEK_SET);
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ model->lm3g.n_prob2 = k;
+ model->lm3g.prob2 = ckd_calloc(k, sizeof(*model->lm3g.prob2));
+ if (fread(model->lm3g.prob2, sizeof(*model->lm3g.prob2), k, fp) != (size_t) k) {
+ E_ERROR("fread(prob2) failed\n");
+ goto error_out;
+ }
+ for (i = 0; i < k; i++) {
+ if (do_swap)
+ SWAP_INT32(&model->lm3g.prob2[i].l);
+ /* Convert values to log. */
+ model->lm3g.prob2[i].l = logmath_log10_to_log(lmath, model->lm3g.prob2[i].f);
+ }
+ E_INFO("%8d = LM.prob2 entries read\n", k);
+ }
+
+ /* read n_bo_wt2 and bo_wt2 array (in memory) */
+ if (base->n > 2) {
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ model->lm3g.n_bo_wt2 = k;
+ model->lm3g.bo_wt2 = ckd_calloc(k, sizeof(*model->lm3g.bo_wt2));
+ if (fread(model->lm3g.bo_wt2, sizeof(*model->lm3g.bo_wt2), k, fp) != (size_t) k) {
+ E_ERROR("Failed to read backoff weights\n");
+ goto error_out;
+ }
+ for (i = 0; i < k; i++) {
+ if (do_swap)
+ SWAP_INT32(&model->lm3g.bo_wt2[i].l);
+ /* Convert values to log. */
+ model->lm3g.bo_wt2[i].l = logmath_log10_to_log(lmath, model->lm3g.bo_wt2[i].f);
+ }
+ E_INFO("%8d = LM.bo_wt2 entries read\n", k);
+ }
+
+ /* read n_prob3 and prob3 array (in memory) */
+ if (base->n > 2) {
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ model->lm3g.n_prob3 = k;
+ model->lm3g.prob3 = ckd_calloc(k, sizeof(*model->lm3g.prob3));
+ if (fread(model->lm3g.prob3, sizeof(*model->lm3g.prob3), k, fp) != (size_t) k) {
+ E_ERROR("Failed to read trigram probability\n");
+ goto error_out;
+ }
+ for (i = 0; i < k; i++) {
+ if (do_swap)
+ SWAP_INT32(&model->lm3g.prob3[i].l);
+ /* Convert values to log. */
+ model->lm3g.prob3[i].l = logmath_log10_to_log(lmath, model->lm3g.prob3[i].f);
+ }
+ E_INFO("%8d = LM.prob3 entries read\n", k);
+ }
+
+ /* read tseg_base size and tseg_base */
+ if (do_mmap)
+ offset = ftell(fp);
+ if (n_trigram > 0) {
+ if (do_mmap) {
+ memcpy(&k, map_base + offset, sizeof(k));
+ offset += sizeof(int32);
+ model->lm3g.tseg_base = (int32 *) (map_base + offset);
+ offset += k * sizeof(int32);
+ }
+ else {
+ k = (n_bigram + 1) / BG_SEG_SZ + 1;
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ model->lm3g.tseg_base = ckd_calloc(k, sizeof(int32));
+ if (fread(model->lm3g.tseg_base, sizeof(int32), k, fp) !=
+ (size_t) k) {
+ E_ERROR("Failed to read trigram index\n");
+ goto error_out;
+ }
+ if (do_swap)
+ for (i = 0; i < k; i++)
+ SWAP_INT32(&model->lm3g.tseg_base[i]);
+ }
+ E_INFO("%8d = LM.tseg_base entries read\n", k);
+ }
+
+ /* read ascii word strings */
+ if (do_mmap) {
+ memcpy(&k, map_base + offset, sizeof(k));
+ offset += sizeof(int32);
+ tmp_word_str = (char *) (map_base + offset);
+ offset += k;
+ }
+ else {
+ base->writable = TRUE;
+ if (fread(&k, sizeof(k), 1, fp) != 1)
+ goto error_out;
+ if (do_swap) SWAP_INT32(&k);
+ tmp_word_str = ckd_calloc(k, 1);
+ if (fread(tmp_word_str, 1, k, fp) != (size_t) k) {
+ E_ERROR("Failed to read words\n");
+ goto error_out;
+ }
+ }
+
+ /* First make sure string just read contains n_counts[0] words (PARANOIA!!) */
+ for (i = 0, j = 0; i < k; i++)
+ if (tmp_word_str[i] == '\0')
+ j++;
+ if (j != n_unigram) {
+ E_ERROR("Error reading word strings (%d doesn't match n_unigrams %d)\n",
+ j, n_unigram);
+ goto error_out;
+ }
+
+ /* Break up string just read into words */
+ if (do_mmap) {
+ j = 0;
+ for (i = 0; i < n_unigram; i++) {
+ base->word_str[i] = tmp_word_str + j;
+ if (hash_table_enter(base->wid, base->word_str[i],
+ (void *)(long)i) != (void *)(long)i) {
+ E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]);
+ }
+ j += strlen(base->word_str[i]) + 1;
+ }
+ }
+ else {
+ j = 0;
+ for (i = 0; i < n_unigram; i++) {
+ base->word_str[i] = ckd_salloc(tmp_word_str + j);
+ if (hash_table_enter(base->wid, base->word_str[i],
+ (void *)(long)i) != (void *)(long)i) {
+ E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]);
+ }
+ j += strlen(base->word_str[i]) + 1;
+ }
+ free(tmp_word_str);
+ }
+ E_INFO("%8d = ascii word strings read\n", i);
+
+ fclose_comp(fp, is_pipe);
+ return base;
+
+error_out:
+ if (fp)
+ fclose_comp(fp, is_pipe);
+ ngram_model_free(base);
+ return NULL;
+}
+
+ngram_model_dmp_t *
+ngram_model_dmp_build(ngram_model_t *base)
+{
+ ngram_model_dmp_t *model;
+ ngram_model_t *newbase;
+ ngram_iter_t *itor;
+ sorted_list_t sorted_prob2;
+ sorted_list_t sorted_bo_wt2;
+ sorted_list_t sorted_prob3;
+ bigram_t *bgptr;
+ trigram_t *tgptr;
+ int i, bgcount, tgcount, seg;
+
+ if (base->funcs == &ngram_model_dmp_funcs) {
+ E_INFO("Using existing DMP model.\n");
+ return (ngram_model_dmp_t *)ngram_model_retain(base);
+ }
+
+ /* Initialize new base model structure with params from base. */
+ E_INFO("Building DMP model...\n");
+ model = ckd_calloc(1, sizeof(*model));
+ newbase = &model->base;
+ ngram_model_init(newbase, &ngram_model_dmp_funcs,
+ logmath_retain(base->lmath),
+ base->n, base->n_counts[0]);
+ /* Copy N-gram counts over. */
+ memcpy(newbase->n_counts, base->n_counts,
+ base->n * sizeof(*base->n_counts));
+ /* Make sure word strings are freed. */
+ newbase->writable = TRUE;
+ /* Initialize unigram table and string table. */
+ model->lm3g.unigrams = new_unigram_table(newbase->n_counts[0] + 1);
+ for (itor = ngram_model_mgrams(base, 0); itor;
+ itor = ngram_iter_next(itor)) {
+ int32 prob1, bo_wt1;
+ int32 const *wids;
+
+ /* Can't guarantee they will go in unigram order, so just to
+ * be correct, we do this... */
+ wids = ngram_iter_get(itor, &prob1, &bo_wt1);
+ model->lm3g.unigrams[wids[0]].prob1.l = prob1;
+ model->lm3g.unigrams[wids[0]].bo_wt1.l = bo_wt1;
+ newbase->word_str[wids[0]] = ckd_salloc(ngram_word(base, wids[0]));
+ if ((hash_table_enter_int32(newbase->wid,
+ newbase->word_str[wids[0]], wids[0]))
+ != wids[0]) {
+ E_WARN("Duplicate word in dictionary: %s\n", newbase->word_str[wids[0]]);
+ }
+ }
+ E_INFO("%8d = #unigrams created\n", newbase->n_counts[0]);
+
+ if (newbase->n < 2)
+ return model;
+
+ /* Construct quantized probability table for bigrams and
+ * (optionally) trigrams. Hesitate to use the "sorted list" thing
+ * since it isn't so useful, but it's there already. */
+ init_sorted_list(&sorted_prob2);
+ if (newbase->n > 2) {
+ init_sorted_list(&sorted_bo_wt2);
+ init_sorted_list(&sorted_prob3);
+ }
+ /* Construct bigram and trigram arrays. */
+ bgptr = model->lm3g.bigrams = ckd_calloc(newbase->n_counts[1] + 1, sizeof(bigram_t));
+ if (newbase->n > 2) {
+ tgptr = model->lm3g.trigrams = ckd_calloc(newbase->n_counts[2], sizeof(trigram_t));
+ model->lm3g.tseg_base =
+ ckd_calloc((newbase->n_counts[1] + 1) / BG_SEG_SZ + 1, sizeof(int32));
+ }
+ else
+ tgptr = NULL;
+ /* Since bigrams and trigrams have to be contiguous with others
+ * with the same N-1-gram, we traverse them in depth-first order
+ * to build the bigram and trigram arrays. */
+ for (i = 0; i < newbase->n_counts[0]; ++i) {
+ ngram_iter_t *uitor;
+ bgcount = bgptr - model->lm3g.bigrams;
+ /* First bigram index (same as next if no bigrams...) */
+ model->lm3g.unigrams[i].bigrams = bgcount;
+ E_DEBUG(2, ("unigram %d: %s => bigram %d\n", i, newbase->word_str[i], bgcount));
+ /* All bigrams corresponding to unigram i */
+ uitor = ngram_ng_iter(base, i, NULL, 0);
+ for (itor = ngram_iter_successors(uitor);
+ itor; ++bgptr, itor = ngram_iter_next(itor)) {
+ int32 prob2, bo_wt2;
+ int32 const *wids;
+ ngram_iter_t *titor;
+
+ wids = ngram_iter_get(itor, &prob2, &bo_wt2);
+
+ assert (bgptr - model->lm3g.bigrams < newbase->n_counts[1]);
+
+ bgptr->wid = wids[1];
+ bgptr->prob2 = sorted_id(&sorted_prob2, &prob2);
+ if (newbase->n > 2) {
+ tgcount = (tgptr - model->lm3g.trigrams);
+ bgcount = (bgptr - model->lm3g.bigrams);
+
+ /* Backoff weight (only if there are trigrams...) */
+ bgptr->bo_wt2 = sorted_id(&sorted_bo_wt2, &bo_wt2);
+
+ /* Find bigram segment for this bigram (this isn't
+ * used unless there are trigrams) */
+ seg = bgcount >> LOG_BG_SEG_SZ;
+ /* If we just crossed a bigram segment boundary, then
+ * point tseg_base for the new segment to the current
+ * trigram pointer. */
+ if (seg != (bgcount - 1) >> LOG_BG_SEG_SZ)
+ model->lm3g.tseg_base[seg] = tgcount;
+ /* Now calculate the trigram offset. */
+ bgptr->trigrams = tgcount - model->lm3g.tseg_base[seg];
+ E_DEBUG(2, ("bigram %d %s %s => trigram %d:%d\n",
+ bgcount,
+ newbase->word_str[wids[0]],
+ newbase->word_str[wids[1]],
+ seg, bgptr->trigrams));
+
+ /* And fill in successors' trigram info. */
+ for (titor = ngram_iter_successors(itor);
+ titor; ++tgptr, titor = ngram_iter_next(titor)) {
+ int32 prob3, dummy;
+
+ assert(tgptr - model->lm3g.trigrams < newbase->n_counts[2]);
+ wids = ngram_iter_get(titor, &prob3, &dummy);
+ tgptr->wid = wids[2];
+ tgptr->prob3 = sorted_id(&sorted_prob3, &prob3);
+ E_DEBUG(2, ("trigram %d %s %s %s => prob %d\n",
+ tgcount,
+ newbase->word_str[wids[0]],
+ newbase->word_str[wids[1]],
+ newbase->word_str[wids[2]],
+ tgptr->prob3));
+ }
+ }
+ }
+ ngram_iter_free(uitor);
+ }
+ /* Add sentinal unigram and bigram records. */
+ bgcount = bgptr - model->lm3g.bigrams;
+ tgcount = tgptr - model->lm3g.trigrams;
+ seg = bgcount >> LOG_BG_SEG_SZ;
+ if (seg != (bgcount - 1) >> LOG_BG_SEG_SZ)
+ model->lm3g.tseg_base[seg] = tgcount;
+ model->lm3g.unigrams[i].bigrams = bgcount;
+ if (newbase->n > 2)
+ bgptr->trigrams = tgcount - model->lm3g.tseg_base[seg];
+
+ /* Now create probability tables. */
+ model->lm3g.n_prob2 = sorted_prob2.free;
+ model->lm3g.prob2 = vals_in_sorted_list(&sorted_prob2);
+ E_INFO("%8d = #bigrams created\n", newbase->n_counts[1]);
+ E_INFO("%8d = #prob2 entries\n", model->lm3g.n_prob2);
+ free_sorted_list(&sorted_prob2);
+ if (newbase->n > 2) {
+ /* Create trigram bo-wts array. */
+ model->lm3g.n_bo_wt2 = sorted_bo_wt2.free;
+ model->lm3g.bo_wt2 = vals_in_sorted_list(&sorted_bo_wt2);
+ free_sorted_list(&sorted_bo_wt2);
+ E_INFO("%8d = #bo_wt2 entries\n", model->lm3g.n_bo_wt2);
+ /* Create trigram probability table. */
+ model->lm3g.n_prob3 = sorted_prob3.free;
+ model->lm3g.prob3 = vals_in_sorted_list(&sorted_prob3);
+ E_INFO("%8d = #trigrams created\n", newbase->n_counts[2]);
+ E_INFO("%8d = #prob3 entries\n", model->lm3g.n_prob3);
+ free_sorted_list(&sorted_prob3);
+ /* Initialize tginfo */
+ model->lm3g.tginfo = ckd_calloc(newbase->n_counts[0], sizeof(tginfo_t *));
+ model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t));
+ }
+
+ return model;
+}
+
+static void
+fwrite_int32(FILE *fh, int32 val)
+{
+ fwrite(&val, 4, 1, fh);
+}
+
+static void
+fwrite_ug(FILE *fh, unigram_t *ug, logmath_t *lmath)
+{
+ int32 bogus = -1;
+ float32 log10val;
+
+ /* Bogus dictionary mapping field. */
+ fwrite(&bogus, 4, 1, fh);
+ /* Convert values to log10. */
+ log10val = logmath_log_to_log10(lmath, ug->prob1.l);
+ fwrite(&log10val, 4, 1, fh);
+ log10val = logmath_log_to_log10(lmath, ug->bo_wt1.l);
+ fwrite(&log10val, 4, 1, fh);
+ fwrite_int32(fh, ug->bigrams);
+}
+
+static void
+fwrite_bg(FILE *fh, bigram_t *bg)
+{
+ fwrite(bg, sizeof(*bg), 1, fh);
+}
+
+static void
+fwrite_tg(FILE *fh, trigram_t *tg)
+{
+ fwrite(tg, sizeof(*tg), 1, fh);
+}
+
+/** Please look at the definition of
+ */
+static char const *fmtdesc[] = {
+ "BEGIN FILE FORMAT DESCRIPTION",
+ "Header string length (int32) and string (including trailing 0)",
+ "Original LM filename string-length (int32) and filename (including trailing 0)",
+ "(int32) version number (present iff value <= 0)",
+ "(int32) original LM file modification timestamp (iff version# present)",
+ "(int32) string-length and string (including trailing 0) (iff version# present)",
+ "... previous entry continued any number of times (iff version# present)",
+ "(int32) 0 (terminating sequence of strings) (iff version# present)",
+ "(int32) log_bg_seg_sz (present iff different from default value of LOG2_BG_SEG_SZ)",
+ "(int32) lm_t.ucount (must be > 0)",
+ "(int32) lm_t.bcount",
+ "(int32) lm_t.tcount",
+ "lm_t.ucount+1 unigrams (including sentinel)",
+ "lm_t.bcount+1 bigrams (including sentinel 64 bits (bg_t) each if version=-1/-2, 128 bits (bg32_t) each if version=-3",
+ "lm_t.tcount trigrams (present iff lm_t.tcount > 0 32 bits (tg_t) each if version=-1/-2, 64 bits (tg32_t) each if version=-3)",
+ "(int32) lm_t.n_prob2",
+ "(int32) lm_t.prob2[]",
+ "(int32) lm_t.n_bo_wt2 (present iff lm_t.tcount > 0)",
+ "(int32) lm_t.bo_wt2[] (present iff lm_t.tcount > 0)",
+ "(int32) lm_t.n_prob3 (present iff lm_t.tcount > 0)",
+ "(int32) lm_t.prob3[] (present iff lm_t.tcount > 0)",
+ "(int32) (lm_t.bcount+1)/BG_SEG_SZ+1 (present iff lm_t.tcount > 0)",
+ "(int32) lm_t.tseg_base[] (present iff lm_t.tcount > 0)",
+ "(int32) Sum(all word string-lengths, including trailing 0 for each)",
+ "All word strings (including trailing 0 for each)",
+ "END FILE FORMAT DESCRIPTION",
+ NULL,
+};
+
+static void
+ngram_model_dmp_write_header(FILE * fh)
+{
+ int32 k;
+ k = strlen(darpa_hdr) + 1;
+ fwrite_int32(fh, k);
+ fwrite(darpa_hdr, 1, k, fh);
+}
+
+static void
+ngram_model_dmp_write_lm_filename(FILE * fh, const char *lmfile)
+{
+ int32 k;
+
+ k = strlen(lmfile) + 1;
+ fwrite_int32(fh, k);
+ fwrite(lmfile, 1, k, fh);
+}
+
+#define LMDMP_VERSION_TG_16BIT -1 /**< VERSION 1 is the simplest DMP file which
+ is trigram or lower which used 16 bits in
+ bigram and trigram.*/
+
+static void
+ngram_model_dmp_write_version(FILE * fh, int32 mtime)
+{
+ fwrite_int32(fh, LMDMP_VERSION_TG_16BIT); /* version # */
+ fwrite_int32(fh, mtime);
+}
+
+static void
+ngram_model_dmp_write_ngram_counts(FILE * fh, ngram_model_t *model)
+{
+ fwrite_int32(fh, model->n_counts[0]);
+ fwrite_int32(fh, model->n_counts[1]);
+ fwrite_int32(fh, model->n_counts[2]);
+}
+
+static void
+ngram_model_dmp_write_fmtdesc(FILE * fh)
+{
+ int32 i, k;
+ long pos;
+
+ /* Write file format description into header */
+ for (i = 0; fmtdesc[i] != NULL; i++) {
+ k = strlen(fmtdesc[i]) + 1;
+ fwrite_int32(fh, k);
+ fwrite(fmtdesc[i], 1, k, fh);
+ }
+ /* Pad it out in order to achieve 32-bit alignment */
+ pos = ftell(fh);
+ k = pos & 3;
+ if (k) {
+ fwrite_int32(fh, 4-k);
+ fwrite("!!!!", 1, 4-k, fh);
+ }
+ fwrite_int32(fh, 0);
+}
+
+static void
+ngram_model_dmp_write_unigram(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i;
+
+ for (i = 0; i <= model->n_counts[0]; i++) {
+ fwrite_ug(fh, &(lm->lm3g.unigrams[i]), model->lmath);
+ }
+}
+
+
+static void
+ngram_model_dmp_write_bigram(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i;
+
+ for (i = 0; i <= model->n_counts[1]; i++) {
+ fwrite_bg(fh, &(lm->lm3g.bigrams[i]));
+ }
+
+}
+
+static void
+ngram_model_dmp_write_trigram(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i;
+
+ for (i = 0; i < model->n_counts[2]; i++) {
+ fwrite_tg(fh, &(lm->lm3g.trigrams[i]));
+ }
+}
+
+static void
+ngram_model_dmp_write_bgprob(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i;
+
+ fwrite_int32(fh, lm->lm3g.n_prob2);
+ for (i = 0; i < lm->lm3g.n_prob2; i++) {
+ float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.prob2[i].l);
+ fwrite(&log10val, 4, 1, fh);
+ }
+}
+
+static void
+ngram_model_dmp_write_tgbowt(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i;
+
+ fwrite_int32(fh, lm->lm3g.n_bo_wt2);
+ for (i = 0; i < lm->lm3g.n_bo_wt2; i++) {
+ float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.bo_wt2[i].l);
+ fwrite(&log10val, 4, 1, fh);
+ }
+}
+
+static void
+ngram_model_dmp_write_tgprob(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i;
+
+ fwrite_int32(fh, lm->lm3g.n_prob3);
+ for (i = 0; i < lm->lm3g.n_prob3; i++) {
+ float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.prob3[i].l);
+ fwrite(&log10val, 4, 1, fh);
+ }
+}
+
+static void
+ngram_model_dmp_write_tg_segbase(FILE *fh, ngram_model_t *model)
+{
+ ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model;
+ int32 i, k;
+
+ k = (model->n_counts[1] + 1) / BG_SEG_SZ + 1;
+ fwrite_int32(fh, k);
+ for (i = 0; i < k; i++)
+ fwrite_int32(fh, lm->lm3g.tseg_base[i]);
+}
+
+static void
+ngram_model_dmp_write_wordstr(FILE *fh, ngram_model_t *model)
+{
+ int32 i, k;
+
+ k = 0;
+ for (i = 0; i < model->n_counts[0]; i++)
+ k += strlen(model->word_str[i]) + 1;
+ fwrite_int32(fh, k);
+ for (i = 0; i < model->n_counts[0]; i++)
+ fwrite(model->word_str[i], 1,
+ strlen(model->word_str[i]) + 1, fh);
+}
+
+int
+ngram_model_dmp_write(ngram_model_t *base,
+ const char *file_name)
+{
+ ngram_model_dmp_t *model;
+ ngram_model_t *newbase;
+ FILE *fh;
+
+ /* First, construct a DMP model from the base model. */
+ model = ngram_model_dmp_build(base);
+ newbase = &model->base;
+
+ /* Now write it, confident in the knowledge that it's the right
+ * kind of language model internally. */
+ if ((fh = fopen(file_name, "wb")) == NULL) {
+ E_ERROR("Cannot create file %s\n", file_name);
+ return -1;
+ }
+ ngram_model_dmp_write_header(fh);
+ ngram_model_dmp_write_lm_filename(fh, file_name);
+ ngram_model_dmp_write_version(fh, 0);
+ ngram_model_dmp_write_fmtdesc(fh);
+ ngram_model_dmp_write_ngram_counts(fh, newbase);
+ ngram_model_dmp_write_unigram(fh, newbase);
+ if (newbase->n > 1) {
+ ngram_model_dmp_write_bigram(fh, newbase);
+ if (newbase->n > 2) {
+ ngram_model_dmp_write_trigram(fh, newbase);
+ }
+ ngram_model_dmp_write_bgprob(fh, newbase);
+ if (newbase->n > 2) {
+ ngram_model_dmp_write_tgbowt(fh, newbase);
+ ngram_model_dmp_write_tgprob(fh, newbase);
+ ngram_model_dmp_write_tg_segbase(fh, newbase);
+ }
+ }
+ ngram_model_dmp_write_wordstr(fh, newbase);
+ ngram_model_free(newbase);
+
+ return fclose(fh);
+}
+
+static int
+ngram_model_dmp_apply_weights(ngram_model_t *base, float32 lw,
+ float32 wip, float32 uw)
+{
+ ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
+ lm3g_apply_weights(base, &model->lm3g, lw, wip, uw);
+ return 0;
+}
+
+/* Lousy "templating" for things that are largely the same in DMP and
+ * ARPA models, except for the bigram and trigram types and some
+ * names. */
+#define NGRAM_MODEL_TYPE ngram_model_dmp_t
+#include "lm3g_templates.c"
+
+static void
+ngram_model_dmp_free(ngram_model_t *base)
+{
+ ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
+
+ ckd_free(model->lm3g.unigrams);
+ ckd_free(model->lm3g.prob2);
+ if (model->dump_mmap) {
+ mmio_file_unmap(model->dump_mmap);
+ }
+ else {
+ ckd_free(model->lm3g.bigrams);
+ if (base->n > 2) {
+ ckd_free(model->lm3g.trigrams);
+ ckd_free(model->lm3g.tseg_base);
+ }
+ }
+ if (base->n > 2) {
+ ckd_free(model->lm3g.bo_wt2);
+ ckd_free(model->lm3g.prob3);
+ }
+
+ lm3g_tginfo_free(base, &model->lm3g);
+}
+
+static ngram_funcs_t ngram_model_dmp_funcs = {
+ ngram_model_dmp_free, /* free */
+ ngram_model_dmp_apply_weights, /* apply_weights */
+ lm3g_template_score, /* score */
+ lm3g_template_raw_score, /* raw_score */
+ lm3g_template_add_ug, /* add_ug */
+ lm3g_template_flush, /* flush */
+ lm3g_template_iter, /* iter */
+ lm3g_template_mgrams, /* mgrams */
+ lm3g_template_successors, /* successors */
+ lm3g_template_iter_get, /* iter_get */
+ lm3g_template_iter_next, /* iter_next */
+ lm3g_template_iter_free /* iter_free */
+};
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h
new file mode 100644
index 000000000..a3b141ad1
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h
@@ -0,0 +1,92 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file ngram_model_dmp.h DMP format for N-Gram models
+ *
+ * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#ifndef __NGRAM_MODEL_DMP_H__
+#define __NGRAM_MODEL_DMP_H__
+
+#include "sphinxbase/mmio.h"
+
+#include "ngram_model_internal.h"
+#include "lm3g_model.h"
+
+/**
+ * On-disk representation of bigrams.
+ */
+struct bigram_s {
+ uint16 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */
+ uint16 prob2; /**< Index into array of actual bigram probs */
+ uint16 bo_wt2; /**< Index into array of actual bigram backoff wts */
+ uint16 trigrams; /**< Index of 1st entry in lm_t.trigrams[],
+ RELATIVE TO its segment base (see lm3g_model.h) */
+};
+
+/**
+ * On-disk representation of trigrams.
+ *
+ * As with bigrams, trigram prob info kept in a separate table for conserving
+ * memory space.
+ */
+struct trigram_s {
+ uint16 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */
+ uint16 prob3; /**< Index into array of actual trigram probs */
+};
+
+/**
+ * Subclass of ngram_model for DMP file reading.
+ */
+typedef struct ngram_model_dmp_s {
+ ngram_model_t base; /**< Base ngram_model_t structure */
+ lm3g_model_t lm3g; /**< Common lm3g_model_t structure */
+ mmio_file_t *dump_mmap; /**< mmap() of dump file (or NULL if none) */
+} ngram_model_dmp_t;
+
+/**
+ * Construct a DMP format model from a generic base model.
+ *
+ * Note: If base is already a DMP format model, this just calls
+ * ngram_model_retain(), and any changes will also be made in the base
+ * model.
+ */
+ngram_model_dmp_t *ngram_model_dmp_build(ngram_model_t *base);
+
+
+#endif /* __NGRAM_MODEL_DMP_H__ */
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h
new file mode 100644
index 000000000..dcc7b5ae3
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h
@@ -0,0 +1,282 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * \file ngram_model_internal.h Internal structures for N-Gram models
+ *
+ * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#ifndef __NGRAM_MODEL_INTERNAL_H__
+#define __NGRAM_MODEL_INTERNAL_H__
+
+#include "sphinxbase/ngram_model.h"
+#include "sphinxbase/hash_table.h"
+
+/**
+ * Common implementation of ngram_model_t.
+ *
+ * The details of bigram, trigram, and higher-order N-gram storage, if any, can
+ * vary somewhat depending on the file format in use.
+ */
+struct ngram_model_s {
+ int refcount; /**< Reference count */
+ int32 *n_counts; /**< Counts for 1, 2, 3, ... grams */
+ int32 n_1g_alloc; /**< Number of allocated word strings (for new word addition) */
+ int32 n_words; /**< Number of actual word strings (NOT the same as the
+ number of unigrams, due to class words). */
+ uint8 n; /**< This is an n-gram model (1, 2, 3, ...). */
+ uint8 n_classes; /**< Number of classes (maximum 128) */
+ uint8 writable; /**< Are word strings writable? */
+ uint8 flags; /**< Any other flags we might care about
+ (FIXME: Merge this and writable) */
+ logmath_t *lmath; /**< Log-math object */
+ float32 lw; /**< Language model scaling factor */
+ int32 log_wip; /**< Log of word insertion penalty */
+ int32 log_uw; /**< Log of unigram weight */
+ int32 log_uniform; /**< Log of uniform (0-gram) probability */
+ int32 log_uniform_weight; /**< Log of uniform weight (i.e. 1 - unigram weight) */
+ int32 log_zero; /**< Zero probability, cached here for quick lookup */
+ char **word_str; /**< Unigram names */
+ hash_table_t *wid; /**< Mapping of unigram names to word IDs. */
+ int32 *tmp_wids; /**< Temporary array of word IDs for ngram_model_get_ngram() */
+ struct ngram_class_s **classes; /**< Word class definitions. */
+ struct ngram_funcs_s *funcs; /**< Implementation-specific methods. */
+};
+
+/**
+ * Implementation of ngram_class_t.
+ */
+struct ngram_class_s {
+ int32 tag_wid; /**< Base word ID for this class tag */
+ int32 start_wid; /**< Starting base word ID for this class' words */
+ int32 n_words; /**< Number of base words for this class */
+ int32 *prob1; /**< Probability table for base words */
+ /**
+ * Custom hash table for additional words.
+ */
+ struct ngram_hash_s {
+ int32 wid; /**< Word ID of this bucket */
+ int32 prob1; /**< Probability for this word */
+ int32 next; /**< Index of next bucket (or -1 for no collision) */
+ } *nword_hash;
+ int32 n_hash; /**< Number of buckets in nword_hash (power of 2) */
+ int32 n_hash_inuse; /**< Number of words in nword_hash */
+};
+
+#define NGRAM_HASH_SIZE 128
+
+#define NGRAM_BASEWID(wid) ((wid)&0xffffff)
+#define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f)
+#define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid))
+#define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000)
+
+#define UG_ALLOC_STEP 10
+
+/** Implementation-specific functions for operating on ngram_model_t objects */
+typedef struct ngram_funcs_s {
+ /**
+ * Implementation-specific function for freeing an ngram_model_t.
+ */
+ void (*free)(ngram_model_t *model);
+ /**
+ * Implementation-specific function for applying language model weights.
+ */
+ int (*apply_weights)(ngram_model_t *model,
+ float32 lw,
+ float32 wip,
+ float32 uw);
+ /**
+ * Implementation-specific function for querying language model score.
+ */
+ int32 (*score)(ngram_model_t *model,
+ int32 wid,
+ int32 *history,
+ int32 n_hist,
+ int32 *n_used);
+ /**
+ * Implementation-specific function for querying raw language
+ * model probability.
+ */
+ int32 (*raw_score)(ngram_model_t *model,
+ int32 wid,
+ int32 *history,
+ int32 n_hist,
+ int32 *n_used);
+ /**
+ * Implementation-specific function for adding unigrams.
+ *
+ * This function updates the internal structures of a language
+ * model to add the given unigram with the given weight (defined
+ * as a log-factor applied to the uniform distribution). This
+ * includes reallocating or otherwise resizing the set of unigrams.
+ *
+ * @return The language model score (not raw log-probability) of
+ * the new word, or 0 for failure.
+ */
+ int32 (*add_ug)(ngram_model_t *model,
+ int32 wid, int32 lweight);
+ /**
+ * Implementation-specific function for purging N-Gram cache
+ */
+ void (*flush)(ngram_model_t *model);
+
+ /**
+ * Implementation-specific function for iterating.
+ */
+ ngram_iter_t * (*iter)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist);
+
+ /**
+ * Implementation-specific function for iterating.
+ */
+ ngram_iter_t * (*mgrams)(ngram_model_t *model, int32 m);
+
+ /**
+ * Implementation-specific function for iterating.
+ */
+ ngram_iter_t * (*successors)(ngram_iter_t *itor);
+
+ /**
+ * Implementation-specific function for iterating.
+ */
+ int32 const * (*iter_get)(ngram_iter_t *itor,
+ int32 *out_score,
+ int32 *out_bowt);
+
+ /**
+ * Implementation-specific function for iterating.
+ */
+ ngram_iter_t * (*iter_next)(ngram_iter_t *itor);
+
+ /**
+ * Implementation-specific function for iterating.
+ */
+ void (*iter_free)(ngram_iter_t *itor);
+} ngram_funcs_t;
+
+/**
+ * Base iterator structure for N-grams.
+ */
+struct ngram_iter_s {
+ ngram_model_t *model;
+ int32 *wids; /**< Scratch space for word IDs. */
+ int16 m; /**< Order of history. */
+ int16 successor; /**< Is this a successor iterator? */
+};
+
+/**
+ * One class definition from a classdef file.
+ */
+typedef struct classdef_s {
+ char **words;
+ float32 *weights;
+ int32 n_words;
+} classdef_t;
+
+/**
+ * Initialize the base ngram_model_t structure.
+ */
+int32
+ngram_model_init(ngram_model_t *model,
+ ngram_funcs_t *funcs,
+ logmath_t *lmath,
+ int32 n, int32 n_unigram);
+
+/**
+ * Read an N-Gram model from an ARPABO text file.
+ */
+ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config,
+ const char *file_name,
+ logmath_t *lmath);
+/**
+ * Read an N-Gram model from a Sphinx .DMP binary file.
+ */
+ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config,
+ const char *file_name,
+ logmath_t *lmath);
+/**
+ * Read an N-Gram model from a Sphinx .DMP32 binary file.
+ */
+ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config,
+ const char *file_name,
+ logmath_t *lmath);
+
+/**
+ * Write an N-Gram model to an ARPABO text file.
+ */
+int ngram_model_arpa_write(ngram_model_t *model,
+ const char *file_name);
+/**
+ * Write an N-Gram model to a Sphinx .DMP binary file.
+ */
+int ngram_model_dmp_write(ngram_model_t *model,
+ const char *file_name);
+
+/**
+ * Read a probdef file.
+ */
+int32 read_classdef_file(hash_table_t *classes, const char *classdef_file);
+
+/**
+ * Free a class definition.
+ */
+void classdef_free(classdef_t *classdef);
+
+/**
+ * Allocate and initialize an N-Gram class.
+ */
+ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid,
+ int32 start_wid, glist_t classwords);
+
+/**
+ * Deallocate an N-Gram class.
+ */
+void ngram_class_free(ngram_class_t *lmclass);
+
+/**
+ * Get the in-class log probability for a word in an N-Gram class.
+ *
+ * @return This probability, or 1 if word not found.
+ */
+int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid);
+
+/**
+ * Initialize base M-Gram iterator structure.
+ */
+void ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model,
+ int m, int successor);
+
+#endif /* __NGRAM_MODEL_INTERNAL_H__ */
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c
new file mode 100644
index 000000000..50b7557ae
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c
@@ -0,0 +1,870 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2008 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/**
+ * @file ngram_model_set.c Set of language models.
+ * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "sphinxbase/err.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/strfuncs.h"
+#include "sphinxbase/filename.h"
+
+#include "ngram_model_set.h"
+
+static ngram_funcs_t ngram_model_set_funcs;
+
+static int
+my_compare(const void *a, const void *b)
+{
+ /* Make sure <UNK> floats to the beginning. */
+ if (strcmp(*(char * const *)a, "<UNK>") == 0)
+ return -1;
+ else if (strcmp(*(char * const *)b, "<UNK>") == 0)
+ return 1;
+ else
+ return strcmp(*(char * const *)a, *(char * const *)b);
+}
+
+static void
+build_widmap(ngram_model_t *base, logmath_t *lmath, int32 n)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ ngram_model_t **models = set->lms;
+ hash_table_t *vocab;
+ glist_t hlist;
+ gnode_t *gn;
+ int32 i;
+
+ /* Construct a merged vocabulary and a set of word-ID mappings. */
+ vocab = hash_table_new(models[0]->n_words, FALSE);
+ /* Create the set of merged words. */
+ for (i = 0; i < set->n_models; ++i) {
+ int32 j;
+ for (j = 0; j < models[i]->n_words; ++j) {
+ /* Ignore collisions. */
+ (void)hash_table_enter_int32(vocab, models[i]->word_str[j], j);
+ }
+ }
+ /* Create the array of words, then sort it. */
+ if (hash_table_lookup(vocab, "<UNK>", NULL) != 0)
+ (void)hash_table_enter_int32(vocab, "<UNK>", 0);
+ /* Now we know the number of unigrams, initialize the base model. */
+ ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab));
+ base->writable = FALSE; /* We will reuse the pointers from the submodels. */
+ i = 0;
+ hlist = hash_table_tolist(vocab, NULL);
+ for (gn = hlist; gn; gn = gnode_next(gn)) {
+ hash_entry_t *ent = gnode_ptr(gn);
+ base->word_str[i++] = (char *)ent->key;
+ }
+ glist_free(hlist);
+ qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare);
+
+ /* Now create the word ID mappings. */
+ if (set->widmap)
+ ckd_free_2d((void **)set->widmap);
+ set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models,
+ sizeof(**set->widmap));
+ for (i = 0; i < base->n_words; ++i) {
+ int32 j;
+ /* Also create the master wid mapping. */
+ (void)hash_table_enter_int32(base->wid, base->word_str[i], i);
+ /* printf("%s: %d => ", base->word_str[i], i); */
+ for (j = 0; j < set->n_models; ++j) {
+ set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]);
+ /* printf("%d ", set->widmap[i][j]); */
+ }
+ /* printf("\n"); */
+ }
+ hash_table_free(vocab);
+}
+
+ngram_model_t *
+ngram_model_set_init(cmd_ln_t *config,
+ ngram_model_t **models,
+ char **names,
+ const float32 *weights,
+ int32 n_models)
+{
+ ngram_model_set_t *model;
+ ngram_model_t *base;
+ logmath_t *lmath;
+ int32 i, n;
+
+ if (n_models == 0) /* WTF */
+ return NULL;
+
+ /* Do consistency checking on the models. They must all use the
+ * same logbase and shift. */
+ lmath = models[0]->lmath;
+ for (i = 1; i < n_models; ++i) {
+ if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath)
+ || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) {
+ E_ERROR("Log-math parameters don't match, will not create LM set\n");
+ return NULL;
+ }
+ }
+
+ /* Allocate the combined model, initialize it. */
+ model = ckd_calloc(1, sizeof(*model));
+ base = &model->base;
+ model->n_models = n_models;
+ model->lms = ckd_calloc(n_models, sizeof(*model->lms));
+ model->names = ckd_calloc(n_models, sizeof(*model->names));
+ /* Initialize weights to a uniform distribution */
+ model->lweights = ckd_calloc(n_models, sizeof(*model->lweights));
+ {
+ int32 uniform = logmath_log(lmath, 1.0/n_models);
+ for (i = 0; i < n_models; ++i)
+ model->lweights[i] = uniform;
+ }
+ /* Default to interpolate if weights were given. */
+ if (weights)
+ model->cur = -1;
+
+ n = 0;
+ for (i = 0; i < n_models; ++i) {
+ model->lms[i] = ngram_model_retain(models[i]);
+ model->names[i] = ckd_salloc(names[i]);
+ if (weights)
+ model->lweights[i] = logmath_log(lmath, weights[i]);
+ /* N is the maximum of all merged models. */
+ if (models[i]->n > n)
+ n = models[i]->n;
+ }
+ /* Allocate the history mapping table. */
+ model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist));
+
+ /* Now build the word-ID mapping and merged vocabulary. */
+ build_widmap(base, lmath, n);
+ return base;
+}
+
+ngram_model_t *
+ngram_model_set_read(cmd_ln_t *config,
+ const char *lmctlfile,
+ logmath_t *lmath)
+{
+ FILE *ctlfp;
+ glist_t lms = NULL;
+ glist_t lmnames = NULL;
+ __BIGSTACKVARIABLE__ char str[1024];
+ ngram_model_t *set = NULL;
+ hash_table_t *classes;
+ char *basedir, *c;
+
+ /* Read all the class definition files to accumulate a mapping of
+ * classnames to definitions. */
+ classes = hash_table_new(0, FALSE);
+ if ((ctlfp = fopen(lmctlfile, "r")) == NULL) {
+ E_ERROR_SYSTEM("Failed to open %s", lmctlfile);
+ return NULL;
+ }
+
+ /* Try to find the base directory to append to relative paths in
+ * the lmctl file. */
+ if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) {
+ /* Include the trailing slash. */
+ basedir = ckd_calloc(c - lmctlfile + 2, 1);
+ memcpy(basedir, lmctlfile, c - lmctlfile + 1);
+ }
+ else {
+ basedir = NULL;
+ }
+ E_INFO("Reading LM control file '%s'\n", lmctlfile);
+ if (basedir)
+ E_INFO("Will prepend '%s' to unqualified paths\n", basedir);
+
+ if (fscanf(ctlfp, "%1023s", str) == 1) {
+ if (strcmp(str, "{") == 0) {
+ /* Load LMclass files */
+ while ((fscanf(ctlfp, "%1023s", str) == 1)
+ && (strcmp(str, "}") != 0)) {
+ char *deffile;
+ if (basedir && !path_is_absolute(str))
+ deffile = string_join(basedir, str, NULL);
+ else
+ deffile = ckd_salloc(str);
+ E_INFO("Reading classdef from '%s'\n", deffile);
+ if (read_classdef_file(classes, deffile) < 0) {
+ ckd_free(deffile);
+ goto error_out;
+ }
+ ckd_free(deffile);
+ }
+
+ if (strcmp(str, "}") != 0) {
+ E_ERROR("Unexpected EOF in %s\n", lmctlfile);
+ goto error_out;
+ }
+
+ /* This might be the first LM name. */
+ if (fscanf(ctlfp, "%1023s", str) != 1)
+ str[0] = '\0';
+ }
+ }
+ else
+ str[0] = '\0';
+
+ /* Read in one LM at a time and add classes to them as necessary. */
+ while (str[0] != '\0') {
+ char *lmfile;
+ ngram_model_t *lm;
+
+ if (basedir && str[0] != '/' && str[0] != '\\')
+ lmfile = string_join(basedir, str, NULL);
+ else
+ lmfile = ckd_salloc(str);
+ E_INFO("Reading lm from '%s'\n", lmfile);
+ lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath);
+ if (lm == NULL) {
+ ckd_free(lmfile);
+ goto error_out;
+ }
+ if (fscanf(ctlfp, "%1023s", str) != 1) {
+ E_ERROR("LMname missing after LMFileName '%s'\n", lmfile);
+ ckd_free(lmfile);
+ goto error_out;
+ }
+ ckd_free(lmfile);
+ lms = glist_add_ptr(lms, lm);
+ lmnames = glist_add_ptr(lmnames, ckd_salloc(str));
+
+ if (fscanf(ctlfp, "%1023s", str) == 1) {
+ if (strcmp(str, "{") == 0) {
+ /* LM uses classes; read their names */
+ while ((fscanf(ctlfp, "%1023s", str) == 1) &&
+ (strcmp(str, "}") != 0)) {
+ void *val;
+ classdef_t *classdef;
+
+ if (hash_table_lookup(classes, str, &val) == -1) {
+ E_ERROR("Unknown class %s in control file\n", str);
+ goto error_out;
+ }
+ classdef = val;
+ if (ngram_model_add_class(lm, str, 1.0,
+ classdef->words, classdef->weights,
+ classdef->n_words) < 0) {
+ goto error_out;
+ }
+ E_INFO("Added class %s containing %d words\n",
+ str, classdef->n_words);
+ }
+ if (strcmp(str, "}") != 0) {
+ E_ERROR("Unexpected EOF in %s\n", lmctlfile);
+ goto error_out;
+ }
+ if (fscanf(ctlfp, "%1023s", str) != 1)
+ str[0] = '\0';
+ }
+ }
+ else
+ str[0] = '\0';
+ }
+ fclose(ctlfp);
+
+ /* Now construct arrays out of lms and lmnames, and build an
+ * ngram_model_set. */
+ lms = glist_reverse(lms);
+ lmnames = glist_reverse(lmnames);
+ {
+ int32 n_models;
+ ngram_model_t **lm_array;
+ char **name_array;
+ gnode_t *lm_node, *name_node;
+ int32 i;
+
+ n_models = glist_count(lms);
+ lm_array = ckd_calloc(n_models, sizeof(*lm_array));
+ name_array = ckd_calloc(n_models, sizeof(*name_array));
+ lm_node = lms;
+ name_node = lmnames;
+ for (i = 0; i < n_models; ++i) {
+ lm_array[i] = gnode_ptr(lm_node);
+ name_array[i] = gnode_ptr(name_node);
+ lm_node = gnode_next(lm_node);
+ name_node = gnode_next(name_node);
+ }
+ set = ngram_model_set_init(config, lm_array, name_array,
+ NULL, n_models);
+ ckd_free(lm_array);
+ ckd_free(name_array);
+ }
+error_out:
+ {
+ gnode_t *gn;
+ glist_t hlist;
+
+ if (set == NULL) {
+ for (gn = lms; gn; gn = gnode_next(gn)) {
+ ngram_model_free(gnode_ptr(gn));
+ }
+ }
+ glist_free(lms);
+ for (gn = lmnames; gn; gn = gnode_next(gn)) {
+ ckd_free(gnode_ptr(gn));
+ }
+ glist_free(lmnames);
+ hlist = hash_table_tolist(classes, NULL);
+ for (gn = hlist; gn; gn = gnode_next(gn)) {
+ hash_entry_t *he = gnode_ptr(gn);
+ ckd_free((char *)he->key);
+ classdef_free(he->val);
+ }
+ glist_free(hlist);
+ hash_table_free(classes);
+ ckd_free(basedir);
+ }
+ return set;
+}
+
+int32
+ngram_model_set_count(ngram_model_t *base)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ return set->n_models;
+}
+
+ngram_model_set_iter_t *
+ngram_model_set_iter(ngram_model_t *base)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ ngram_model_set_iter_t *itor;
+
+ if (set == NULL || set->n_models == 0)
+ return NULL;
+ itor = ckd_calloc(1, sizeof(*itor));
+ itor->set = set;
+ return itor;
+}
+
+ngram_model_set_iter_t *
+ngram_model_set_iter_next(ngram_model_set_iter_t *itor)
+{
+ if (++itor->cur == itor->set->n_models) {
+ ngram_model_set_iter_free(itor);
+ return NULL;
+ }
+ return itor;
+}
+
+void
+ngram_model_set_iter_free(ngram_model_set_iter_t *itor)
+{
+ ckd_free(itor);
+}
+
+ngram_model_t *
+ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
+ char const **lmname)
+{
+ if (lmname) *lmname = itor->set->names[itor->cur];
+ return itor->set->lms[itor->cur];
+}
+
+ngram_model_t *
+ngram_model_set_lookup(ngram_model_t *base,
+ const char *name)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 i;
+
+ if (name == NULL) {
+ if (set->cur == -1)
+ return NULL;
+ else
+ return set->lms[set->cur];
+ }
+
+ /* There probably won't be very many submodels. */
+ for (i = 0; i < set->n_models; ++i)
+ if (0 == strcmp(set->names[i], name))
+ break;
+ if (i == set->n_models)
+ return NULL;
+ return set->lms[i];
+}
+
+ngram_model_t *
+ngram_model_set_select(ngram_model_t *base,
+ const char *name)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 i;
+
+ /* There probably won't be very many submodels. */
+ for (i = 0; i < set->n_models; ++i)
+ if (0 == strcmp(set->names[i], name))
+ break;
+ if (i == set->n_models)
+ return NULL;
+ set->cur = i;
+ return set->lms[set->cur];
+}
+
+const char *
+ngram_model_set_current(ngram_model_t *base)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+
+ if (set->cur == -1)
+ return NULL;
+ else
+ return set->names[set->cur];
+}
+
+int32
+ngram_model_set_current_wid(ngram_model_t *base,
+ int32 set_wid)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+
+ if (set->cur == -1 || set_wid >= base->n_words)
+ return NGRAM_INVALID_WID;
+ else
+ return set->widmap[set_wid][set->cur];
+}
+
+int32
+ngram_model_set_known_wid(ngram_model_t *base,
+ int32 set_wid)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+
+ if (set_wid >= base->n_words)
+ return FALSE;
+ else if (set->cur == -1) {
+ int32 i;
+ for (i = 0; i < set->n_models; ++i) {
+ if (set->widmap[set_wid][i] != ngram_unknown_wid(set->lms[i]))
+ return TRUE;
+ }
+ return FALSE;
+ }
+ else
+ return (set->widmap[set_wid][set->cur]
+ != ngram_unknown_wid(set->lms[set->cur]));
+}
+
+ngram_model_t *
+ngram_model_set_interp(ngram_model_t *base,
+ const char **names,
+ const float32 *weights)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+
+ /* If we have a set of weights here, then set them. */
+ if (names && weights) {
+ int32 i, j;
+
+ /* We hope there aren't many models. */
+ for (i = 0; i < set->n_models; ++i) {
+ for (j = 0; j < set->n_models; ++j)
+ if (0 == strcmp(names[i], set->names[j]))
+ break;
+ if (j == set->n_models) {
+ E_ERROR("Unknown LM name %s\n", names[i]);
+ return NULL;
+ }
+ set->lweights[j] = logmath_log(base->lmath, weights[i]);
+ }
+ }
+ else if (weights) {
+ memcpy(set->lweights, weights, set->n_models * sizeof(*set->lweights));
+ }
+ /* Otherwise just enable existing weights. */
+ set->cur = -1;
+ return base;
+}
+
+ngram_model_t *
+ngram_model_set_add(ngram_model_t *base,
+ ngram_model_t *model,
+ const char *name,
+ float32 weight,
+ int reuse_widmap)
+
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ float32 fprob;
+ int32 scale, i;
+
+ /* Add it to the array of lms. */
+ ++set->n_models;
+ set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms));
+ set->lms[set->n_models - 1] = model;
+ set->names = ckd_realloc(set->names, set->n_models * sizeof(*set->names));
+ set->names[set->n_models - 1] = ckd_salloc(name);
+ /* Expand the history mapping table if necessary. */
+ if (model->n > base->n) {
+ base->n = model->n;
+ set->maphist = ckd_realloc(set->maphist,
+ (model->n - 1) * sizeof(*set->maphist));
+ }
+
+ /* Renormalize the interpolation weights. */
+ fprob = weight * 1.0 / set->n_models;
+ set->lweights = ckd_realloc(set->lweights,
+ set->n_models * sizeof(*set->lweights));
+ set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob);
+ /* Now normalize everything else to fit it in. This is
+ * accomplished by simply scaling all the other probabilities
+ * by (1-fprob). */
+ scale = logmath_log(base->lmath, 1.0 - fprob);
+ for (i = 0; i < set->n_models - 1; ++i)
+ set->lweights[i] += scale;
+
+ /* Reuse the old word ID mapping if requested. */
+ if (reuse_widmap) {
+ int32 **new_widmap;
+
+ /* Tack another column onto the widmap array. */
+ new_widmap = (int32 **)ckd_calloc_2d(base->n_words, set->n_models,
+ sizeof (**new_widmap));
+ for (i = 0; i < base->n_words; ++i) {
+ /* Copy all the existing mappings. */
+ memcpy(new_widmap[i], set->widmap[i],
+ (set->n_models - 1) * sizeof(**new_widmap));
+ /* Create the new mapping. */
+ new_widmap[i][set->n_models-1] = ngram_wid(model, base->word_str[i]);
+ }
+ ckd_free_2d((void **)set->widmap);
+ set->widmap = new_widmap;
+ }
+ else {
+ build_widmap(base, base->lmath, base->n);
+ }
+ return model;
+}
+
+ngram_model_t *
+ngram_model_set_remove(ngram_model_t *base,
+ const char *name,
+ int reuse_widmap)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ ngram_model_t *submodel;
+ int32 lmidx, scale, n, i;
+ float32 fprob;
+
+ for (lmidx = 0; lmidx < set->n_models; ++lmidx)
+ if (0 == strcmp(name, set->names[lmidx]))
+ break;
+ if (lmidx == set->n_models)
+ return NULL;
+ submodel = set->lms[lmidx];
+
+ /* Renormalize the interpolation weights by scaling them by
+ * 1/(1-fprob) */
+ fprob = logmath_exp(base->lmath, set->lweights[lmidx]);
+ scale = logmath_log(base->lmath, 1.0 - fprob);
+
+ /* Remove it from the array of lms, renormalize remaining weights,
+ * and recalcluate n. */
+ --set->n_models;
+ n = 0;
+ ckd_free(set->names[lmidx]);
+ set->names[lmidx] = NULL;
+ for (i = 0; i < set->n_models; ++i) {
+ if (i >= lmidx) {
+ set->lms[i] = set->lms[i+1];
+ set->names[i] = set->names[i+1];
+ set->lweights[i] = set->lweights[i+1];
+ }
+ set->lweights[i] -= scale;
+ if (set->lms[i]->n > n)
+ n = set->lms[i]->n;
+ }
+ /* There's no need to shrink these arrays. */
+ set->lms[set->n_models] = NULL;
+ set->lweights[set->n_models] = base->log_zero;
+ /* No need to shrink maphist either. */
+
+ /* Reuse the existing word ID mapping if requested. */
+ if (reuse_widmap) {
+ /* Just go through and shrink each row. */
+ for (i = 0; i < base->n_words; ++i) {
+ memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1,
+ (set->n_models - lmidx) * sizeof(**set->widmap));
+ }
+ }
+ else {
+ build_widmap(base, base->lmath, n);
+ }
+ return submodel;
+}
+
+void
+ngram_model_set_map_words(ngram_model_t *base,
+ const char **words,
+ int32 n_words)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 i;
+
+ /* Recreate the word mapping. */
+ if (base->writable) {
+ for (i = 0; i < base->n_words; ++i) {
+ ckd_free(base->word_str[i]);
+ }
+ }
+ ckd_free(base->word_str);
+ ckd_free_2d((void **)set->widmap);
+ base->writable = TRUE;
+ base->n_words = base->n_1g_alloc = n_words;
+ base->word_str = ckd_calloc(n_words, sizeof(*base->word_str));
+ set->widmap = (int32 **)ckd_calloc_2d(n_words, set->n_models, sizeof(**set->widmap));
+ hash_table_empty(base->wid);
+ for (i = 0; i < n_words; ++i) {
+ int32 j;
+ base->word_str[i] = ckd_salloc(words[i]);
+ (void)hash_table_enter_int32(base->wid, base->word_str[i], i);
+ for (j = 0; j < set->n_models; ++j) {
+ set->widmap[i][j] = ngram_wid(set->lms[j], base->word_str[i]);
+ }
+ }
+}
+
+static int
+ngram_model_set_apply_weights(ngram_model_t *base, float32 lw,
+ float32 wip, float32 uw)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 i;
+
+ /* Apply weights to each sub-model. */
+ for (i = 0; i < set->n_models; ++i)
+ ngram_model_apply_weights(set->lms[i], lw, wip, uw);
+ return 0;
+}
+
+static int32
+ngram_model_set_score(ngram_model_t *base, int32 wid,
+ int32 *history, int32 n_hist,
+ int32 *n_used)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 mapwid;
+ int32 score;
+ int32 i;
+
+ /* Truncate the history. */
+ if (n_hist > base->n - 1)
+ n_hist = base->n - 1;
+
+ /* Interpolate if there is no current. */
+ if (set->cur == -1) {
+ score = base->log_zero;
+ for (i = 0; i < set->n_models; ++i) {
+ int32 j;
+ /* Map word and history IDs for each model. */
+ mapwid = set->widmap[wid][i];
+ for (j = 0; j < n_hist; ++j) {
+ if (history[j] == NGRAM_INVALID_WID)
+ set->maphist[j] = NGRAM_INVALID_WID;
+ else
+ set->maphist[j] = set->widmap[history[j]][i];
+ }
+ score = logmath_add(base->lmath, score,
+ set->lweights[i] +
+ ngram_ng_score(set->lms[i],
+ mapwid, set->maphist, n_hist, n_used));
+ }
+ }
+ else {
+ int32 j;
+ /* Map word and history IDs (FIXME: do this in a function?) */
+ mapwid = set->widmap[wid][set->cur];
+ for (j = 0; j < n_hist; ++j) {
+ if (history[j] == NGRAM_INVALID_WID)
+ set->maphist[j] = NGRAM_INVALID_WID;
+ else
+ set->maphist[j] = set->widmap[history[j]][set->cur];
+ }
+ score = ngram_ng_score(set->lms[set->cur],
+ mapwid, set->maphist, n_hist, n_used);
+ }
+
+ return score;
+}
+
+static int32
+ngram_model_set_raw_score(ngram_model_t *base, int32 wid,
+ int32 *history, int32 n_hist,
+ int32 *n_used)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 mapwid;
+ int32 score;
+ int32 i;
+
+ /* Truncate the history. */
+ if (n_hist > base->n - 1)
+ n_hist = base->n - 1;
+
+ /* Interpolate if there is no current. */
+ if (set->cur == -1) {
+ score = base->log_zero;
+ for (i = 0; i < set->n_models; ++i) {
+ int32 j;
+ /* Map word and history IDs for each model. */
+ mapwid = set->widmap[wid][i];
+ for (j = 0; j < n_hist; ++j) {
+ if (history[j] == NGRAM_INVALID_WID)
+ set->maphist[j] = NGRAM_INVALID_WID;
+ else
+ set->maphist[j] = set->widmap[history[j]][i];
+ }
+ score = logmath_add(base->lmath, score,
+ set->lweights[i] +
+ ngram_ng_prob(set->lms[i],
+ mapwid, set->maphist, n_hist, n_used));
+ }
+ }
+ else {
+ int32 j;
+ /* Map word and history IDs (FIXME: do this in a function?) */
+ mapwid = set->widmap[wid][set->cur];
+ for (j = 0; j < n_hist; ++j) {
+ if (history[j] == NGRAM_INVALID_WID)
+ set->maphist[j] = NGRAM_INVALID_WID;
+ else
+ set->maphist[j] = set->widmap[history[j]][set->cur];
+ }
+ score = ngram_ng_prob(set->lms[set->cur],
+ mapwid, set->maphist, n_hist, n_used);
+ }
+
+ return score;
+}
+
+static int32
+ngram_model_set_add_ug(ngram_model_t *base,
+ int32 wid, int32 lweight)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 *newwid;
+ int32 i, prob;
+
+ /* At this point the word has already been added to the master
+ model and we have a new word ID for it. Add it to active
+ submodels and track the word IDs. */
+ newwid = ckd_calloc(set->n_models, sizeof(*newwid));
+ prob = base->log_zero;
+ for (i = 0; i < set->n_models; ++i) {
+ int32 wprob, n_hist;
+
+ /* Only add to active models. */
+ if (set->cur == -1 || set->cur == i) {
+ /* Did this word already exist? */
+ newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]);
+ if (newwid[i] == NGRAM_INVALID_WID) {
+ /* Add it to the submodel. */
+ newwid[i] = ngram_model_add_word(set->lms[i], base->word_str[wid],
+ logmath_exp(base->lmath, lweight));
+ if (newwid[i] == NGRAM_INVALID_WID) {
+ ckd_free(newwid);
+ return base->log_zero;
+ }
+ }
+ /* Now get the unigram probability for the new word and either
+ * interpolate it or use it (if this is the current model). */
+ wprob = ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist);
+ if (set->cur == i)
+ prob = wprob;
+ else if (set->cur == -1)
+ prob = logmath_add(base->lmath, prob, set->lweights[i] + wprob);
+ }
+ else {
+ newwid[i] = NGRAM_INVALID_WID;
+ }
+ }
+ /* Okay we have the word IDs for this in all the submodels. Now
+ do some complicated memory mangling to add this to the
+ widmap. */
+ set->widmap = ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap));
+ set->widmap[0] = ckd_realloc(set->widmap[0],
+ base->n_words
+ * set->n_models
+ * sizeof(**set->widmap));
+ for (i = 0; i < base->n_words; ++i)
+ set->widmap[i] = set->widmap[0] + i * set->n_models;
+ memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid));
+ ckd_free(newwid);
+ return prob;
+}
+
+static void
+ngram_model_set_free(ngram_model_t *base)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 i;
+
+ for (i = 0; i < set->n_models; ++i)
+ ngram_model_free(set->lms[i]);
+ ckd_free(set->lms);
+ for (i = 0; i < set->n_models; ++i)
+ ckd_free(set->names[i]);
+ ckd_free(set->names);
+ ckd_free(set->lweights);
+ ckd_free(set->maphist);
+ ckd_free_2d((void **)set->widmap);
+}
+
+static void
+ngram_model_set_flush(ngram_model_t *base)
+{
+ ngram_model_set_t *set = (ngram_model_set_t *)base;
+ int32 i;
+
+ for (i = 0; i < set->n_models; ++i)
+ ngram_model_flush(set->lms[i]);
+}
+
+static ngram_funcs_t ngram_model_set_funcs = {
+ ngram_model_set_free, /* free */
+ ngram_model_set_apply_weights, /* apply_weights */
+ ngram_model_set_score, /* score */
+ ngram_model_set_raw_score, /* raw_score */
+ ngram_model_set_add_ug, /* add_ug */
+ ngram_model_set_flush /* flush */
+};
diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h
new file mode 100644
index 000000000..5fbc7e5a4
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h
@@ -0,0 +1,71 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/**
+ * @file ngram_model_set.h Set of language models.
+ * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#ifndef __NGRAM_MODEL_SET_H__
+#define __NGRAM_MODEL_SET_H__
+
+#include "ngram_model_internal.h"
+#include "lm3g_model.h"
+
+/**
+ * Subclass of ngram_model for grouping language models.
+ */
+typedef struct ngram_model_set_s {
+ ngram_model_t base; /**< Base ngram_model_t structure. */
+
+ int32 n_models; /**< Number of models in this set. */
+ int32 cur; /**< Currently selected model, or -1 for none. */
+ ngram_model_t **lms; /**< Language models in this set. */
+ char **names; /**< Names for language models. */
+ int32 *lweights; /**< Log interpolation weights. */
+ int32 **widmap; /**< Word ID mapping for submodels. */
+ int32 *maphist; /**< Word ID mapping for N-Gram history. */
+} ngram_model_set_t;
+
+/**
+ * Iterator over a model set.
+ */
+struct ngram_model_set_iter_s {
+ ngram_model_set_t *set;
+ int32 cur;
+};
+
+#endif /* __NGRAM_MODEL_SET_H__ */