diff options
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/lm/jsgf.c')
-rw-r--r-- | media/sphinxbase/src/libsphinxbase/lm/jsgf.c | 943 |
1 files changed, 943 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c new file mode 100644 index 000000000..90e161c62 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c @@ -0,0 +1,943 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include <string.h> +#include <assert.h> + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/err.h" +#include "sphinxbase/jsgf.h" + +#include "jsgf_internal.h" +#include "jsgf_parser.h" +#include "jsgf_scanner.h" + +extern int yyparse (void* scanner, jsgf_t* jsgf); + +/** + * \file jsgf.c + * + * This file implements the data structures for parsing JSGF grammars + * into Sphinx finite-state grammars. + **/ + +static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, int rule_exit); + +jsgf_atom_t * +jsgf_atom_new(char *name, float weight) +{ + jsgf_atom_t *atom; + + atom = ckd_calloc(1, sizeof(*atom)); + atom->name = ckd_salloc(name); + atom->weight = weight; + return atom; +} + +int +jsgf_atom_free(jsgf_atom_t *atom) +{ + if (atom == NULL) + return 0; + ckd_free(atom->name); + ckd_free(atom); + return 0; +} + +jsgf_t * +jsgf_grammar_new(jsgf_t *parent) +{ + jsgf_t *grammar; + + grammar = ckd_calloc(1, sizeof(*grammar)); + /* If this is an imported/subgrammar, then we will share a global + * namespace with the parent grammar. */ + if (parent) { + grammar->rules = parent->rules; + grammar->imports = parent->imports; + grammar->searchpath = parent->searchpath; + grammar->parent = parent; + } + else { + grammar->rules = hash_table_new(64, 0); + grammar->imports = hash_table_new(16, 0); + } + + return grammar; +} + +void +jsgf_grammar_free(jsgf_t *jsgf) +{ + /* FIXME: Probably should just use refcounting instead. */ + if (jsgf->parent == NULL) { + hash_iter_t *itor; + gnode_t *gn; + + for (itor = hash_table_iter(jsgf->rules); itor; + itor = hash_table_iter_next(itor)) { + ckd_free((char *)itor->ent->key); + jsgf_rule_free((jsgf_rule_t *)itor->ent->val); + } + hash_table_free(jsgf->rules); + for (itor = hash_table_iter(jsgf->imports); itor; + itor = hash_table_iter_next(itor)) { + ckd_free((char *)itor->ent->key); + jsgf_grammar_free((jsgf_t *)itor->ent->val); + } + hash_table_free(jsgf->imports); + for (gn = jsgf->searchpath; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(jsgf->searchpath); + for (gn = jsgf->links; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(jsgf->links); + } + ckd_free(jsgf->name); + ckd_free(jsgf->version); + ckd_free(jsgf->charset); + ckd_free(jsgf->locale); + ckd_free(jsgf); +} + +static void +jsgf_rhs_free(jsgf_rhs_t *rhs) +{ + gnode_t *gn; + + if (rhs == NULL) + return; + + jsgf_rhs_free(rhs->alt); + for (gn = rhs->atoms; gn; gn = gnode_next(gn)) + jsgf_atom_free(gnode_ptr(gn)); + glist_free(rhs->atoms); + ckd_free(rhs); +} + +jsgf_atom_t * +jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus) +{ + jsgf_rule_t *rule; + jsgf_atom_t *rule_atom; + jsgf_rhs_t *rhs; + + /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */ + /* Or if plus is true, (<name> | <name> <g0006>) */ + rhs = ckd_calloc(1, sizeof(*rhs)); + if (plus) + rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0)); + else + rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0)); + rule = jsgf_define_rule(jsgf, NULL, rhs, 0); + rule_atom = jsgf_atom_new(rule->name, 1.0); + rhs = ckd_calloc(1, sizeof(*rhs)); + rhs->atoms = glist_add_ptr(NULL, rule_atom); + rhs->atoms = glist_add_ptr(rhs->atoms, atom); + rule->rhs->alt = rhs; + + return jsgf_atom_new(rule->name, 1.0); +} + +jsgf_rule_t * +jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp) +{ + jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs)); + jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0); + rhs->alt = exp; + rhs->atoms = glist_add_ptr(NULL, atom); + return jsgf_define_rule(jsgf, NULL, rhs, 0); +} + +void +jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to) +{ + jsgf_link_t *link; + + link = ckd_calloc(1, sizeof(*link)); + link->from = from; + link->to = to; + link->atom = atom; + grammar->links = glist_add_ptr(grammar->links, link); +} + +static char * +extract_grammar_name(char *rule_name) +{ + char* dot_pos; + char* grammar_name = ckd_salloc(rule_name + 1); + if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) { + ckd_free(grammar_name); + return NULL; + } + *dot_pos='\0'; + return grammar_name; +} + +char const * +jsgf_grammar_name(jsgf_t *jsgf) +{ + return jsgf->name; +} + +static char * +jsgf_fullname(jsgf_t *jsgf, const char *name) +{ + char *fullname; + + /* Check if it is already qualified */ + if (strchr(name + 1, '.')) + return ckd_salloc(name); + + /* Skip leading < in name */ + fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4); + sprintf(fullname, "<%s.%s", jsgf->name, name + 1); + return fullname; +} + +static char * +jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name) +{ + char *fullname, *grammar_name; + + /* Check if it is already qualified */ + if (strchr(name + 1, '.')) + return ckd_salloc(name); + + /* Skip leading < in name */ + if ((grammar_name = extract_grammar_name(rule->name)) == NULL) + return ckd_salloc(name); + fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4); + sprintf(fullname, "<%s.%s", grammar_name, name + 1); + ckd_free(grammar_name); + + return fullname; +} + +/* Extract as rulename everything after the secondlast dot, if existent. + * Because everything before the secondlast dot is the path-specification. */ +static char * +importname2rulename(char *importname) +{ + char *rulename = ckd_salloc(importname); + char *last_dotpos; + char *secondlast_dotpos; + + if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) { + *last_dotpos='\0'; + if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) { + *last_dotpos='.'; + *secondlast_dotpos='<'; + secondlast_dotpos = ckd_salloc(secondlast_dotpos); + ckd_free(rulename); + return secondlast_dotpos; + } + else { + *last_dotpos='.'; + return rulename; + } + } + else { + return rulename; + } +} + +#define NO_NODE -1 +#define RECURSIVE_NODE -2 + +/** + * + * Expand a right-hand-side of a rule (i.e. a single alternate). + * + * @returns the FSG state at the end of this rule, NO_NODE if there's an + * error, and RECURSIVE_NODE if the right-hand-side ended in right-recursion (i.e. + * a link to an earlier FSG state). + */ +static int +expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs, + int rule_entry, int rule_exit) +{ + gnode_t *gn; + int lastnode; + + /* Last node expanded in this sequence. */ + lastnode = rule_entry; + + /* Iterate over atoms in rhs and generate links/nodes */ + for (gn = rhs->atoms; gn; gn = gnode_next(gn)) { + jsgf_atom_t *atom = gnode_ptr(gn); + + if (jsgf_atom_is_rule(atom)) { + jsgf_rule_t *subrule; + char *fullname; + gnode_t *subnode; + jsgf_rule_stack_t *rule_stack_entry = NULL; + + /* Special case for <NULL> and <VOID> pseudo-rules + If this is the only atom in the rhs, and it's the + first rhs in the rule, then emit a null transition, + creating an exit state if needed. */ + if (0 == strcmp(atom->name, "<NULL>")) { + if (gn == rhs->atoms && gnode_next(gn) == NULL) { + if (rule_exit == NO_NODE) { + jsgf_add_link(grammar, atom, + lastnode, grammar->nstate); + rule_exit = lastnode = grammar->nstate; + ++grammar->nstate; + } else { + jsgf_add_link(grammar, atom, + lastnode, rule_exit); + } + } + continue; + } + else if (0 == strcmp(atom->name, "<VOID>")) { + /* Make this entire RHS unspeakable */ + return NO_NODE; + } + + fullname = jsgf_fullname_from_rule(rule, atom->name); + if (hash_table_lookup(grammar->rules, fullname, (void**)&subrule) == -1) { + E_ERROR("Undefined rule in RHS: %s\n", fullname); + ckd_free(fullname); + return NO_NODE; + } + ckd_free(fullname); + + /* Look for this subrule in the stack of expanded rules */ + for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode)) { + rule_stack_entry = (jsgf_rule_stack_t *)gnode_ptr(subnode); + if (rule_stack_entry->rule == subrule) + break; + } + + if (subnode != NULL) { + /* Allow right-recursion only. */ + if (gnode_next(gn) != NULL) { + E_ERROR("Only right-recursion is permitted (in %s.%s)\n", + grammar->name, rule->name); + return NO_NODE; + } + /* Add a link back to the beginning of this rule instance */ + E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, rule_stack_entry->entry); + jsgf_add_link(grammar, atom, lastnode, rule_stack_entry->entry); + + /* Let our caller know that this rhs didn't reach an + end state. */ + lastnode = RECURSIVE_NODE; + } + else { + /* If this is the last atom in this rhs, link its + expansion to the parent rule's exit state. + Otherwise, create a new exit state for it. */ + int subruleexit = NO_NODE; + if (gnode_next(gn) == NULL && rule_exit >= 0) + subruleexit = rule_exit; + + /* Expand the subrule */ + lastnode = expand_rule(grammar, subrule, lastnode, subruleexit); + + if (lastnode == NO_NODE) + return NO_NODE; + } + } + else { + /* An exit-state is created if this isn't the last atom + in the rhs, or if the containing rule doesn't have an + exit state yet. + Otherwise, the rhs's exit state becomes the containing + rule's exit state. */ + int exitstate; + if (gnode_next(gn) == NULL && rule_exit >= 0) { + exitstate = rule_exit; + } else { + exitstate = grammar->nstate; + ++grammar->nstate; + } + + /* Add a link for this token */ + jsgf_add_link(grammar, atom, + lastnode, exitstate); + lastnode = exitstate; + } + } + + return lastnode; +} + +static int +expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, + int rule_exit) +{ + jsgf_rule_stack_t* rule_stack_entry; + jsgf_rhs_t *rhs; + + /* Push this rule onto the stack */ + rule_stack_entry = (jsgf_rule_stack_t*)ckd_calloc(1, sizeof (jsgf_rule_stack_t)); + rule_stack_entry->rule = rule; + rule_stack_entry->entry = rule_entry; + grammar->rulestack = glist_add_ptr(grammar->rulestack, + rule_stack_entry); + + for (rhs = rule->rhs; rhs; rhs = rhs->alt) { + int lastnode; + + lastnode = expand_rhs(grammar, rule, rhs, + rule_entry, rule_exit); + + if (lastnode == NO_NODE) { + return NO_NODE; + } else if (lastnode == RECURSIVE_NODE) { + /* The rhs ended with right-recursion, i.e. a transition to + an earlier state. Nothing needs to happen at this level. */ + ; + } else if (rule_exit == NO_NODE) { + /* If this rule doesn't have an exit state yet, use the exit + state of its first right-hand-side. + All other right-hand-sides will use this exit state. */ + assert (lastnode >= 0); + rule_exit = lastnode; + } + } + + /* If no exit-state was created, use the entry-state. */ + if (rule_exit == NO_NODE) { + rule_exit = rule_entry; + } + + /* Pop this rule from the rule stack */ + ckd_free(gnode_ptr(grammar->rulestack)); + grammar->rulestack = gnode_free(grammar->rulestack, NULL); + + return rule_exit; +} + +jsgf_rule_iter_t * +jsgf_rule_iter(jsgf_t *grammar) +{ + return hash_table_iter(grammar->rules); +} + +jsgf_rule_t * +jsgf_get_rule(jsgf_t *grammar, char const *name) +{ + void *val; + char *fullname; + + fullname = string_join("<", name, ">", NULL); + if (hash_table_lookup(grammar->rules, fullname, &val) < 0) { + ckd_free(fullname); + return NULL; + } + ckd_free(fullname); + return (jsgf_rule_t *)val; +} + +jsgf_rule_t * +jsgf_get_public_rule(jsgf_t *grammar) +{ + jsgf_rule_iter_t *itor; + jsgf_rule_t *public_rule = NULL; + + for (itor = jsgf_rule_iter(grammar); itor; + itor = jsgf_rule_iter_next(itor)) { + jsgf_rule_t *rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + const char *rule_name = jsgf_rule_name(rule); + char *dot_pos; + if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) { + public_rule = rule; + jsgf_rule_iter_free(itor); + break; + } + if (0 == strncmp(rule_name + 1, jsgf_grammar_name(grammar), dot_pos - rule_name - 1)) { + public_rule = rule; + jsgf_rule_iter_free(itor); + break; + } + } + } + return public_rule; +} + +char const * +jsgf_rule_name(jsgf_rule_t *rule) +{ + return rule->name; +} + +int +jsgf_rule_public(jsgf_rule_t *rule) +{ + return rule->is_public; +} + +static fsg_model_t * +jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw, int do_closure) +{ + fsg_model_t *fsg; + glist_t nulls; + gnode_t *gn; + int rule_entry, rule_exit; + + /* Clear previous links */ + for (gn = grammar->links; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(grammar->links); + grammar->links = NULL; + grammar->nstate = 0; + + /* Create the top-level entry state, and expand the + top-level rule. */ + rule_entry = grammar->nstate++; + rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); + + /* If no exit-state was created, create one. */ + if (rule_exit == NO_NODE) { + rule_exit = grammar->nstate++; + jsgf_add_link(grammar, NULL, rule_entry, rule_exit); + } + + fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); + fsg->start_state = rule_entry; + fsg->final_state = rule_exit; + grammar->links = glist_reverse(grammar->links); + for (gn = grammar->links; gn; gn = gnode_next(gn)) { + jsgf_link_t *link = gnode_ptr(gn); + + if (link->atom) { + if (jsgf_atom_is_rule(link->atom)) { + fsg_model_null_trans_add(fsg, link->from, link->to, + logmath_log(lmath, link->atom->weight)); + } + else { + int wid = fsg_model_word_add(fsg, link->atom->name); + fsg_model_trans_add(fsg, link->from, link->to, + logmath_log(lmath, link->atom->weight), wid); + } + } + else { + fsg_model_null_trans_add(fsg, link->from, link->to, 0); + } + } + if (do_closure) { + nulls = fsg_model_null_trans_closure(fsg, NULL); + glist_free(nulls); + } + + return fsg; +} + +fsg_model_t * +jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw) +{ + return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE); +} + +fsg_model_t * +jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw) +{ + return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE); +} + +fsg_model_t * +jsgf_read_file(const char *file, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + jsgf_t *jsgf; + jsgf_rule_iter_t *itor; + + if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) { + E_ERROR("Error parsing file: %s\n", file); + return NULL; + } + + rule = NULL; + for (itor = jsgf_rule_iter(jsgf); itor; + itor = jsgf_rule_iter_next(itor)) { + rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + jsgf_rule_iter_free(itor); + break; + } + } + if (rule == NULL) { + E_ERROR("No public rules found in %s\n", file); + return NULL; + } + fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); + jsgf_grammar_free(jsgf); + return fsg; +} + +fsg_model_t * +jsgf_read_string(const char *string, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + jsgf_t *jsgf; + jsgf_rule_iter_t *itor; + + if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) { + E_ERROR("Error parsing input string\n"); + return NULL; + } + + rule = NULL; + for (itor = jsgf_rule_iter(jsgf); itor; + itor = jsgf_rule_iter_next(itor)) { + rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + jsgf_rule_iter_free(itor); + break; + } + } + if (rule == NULL) { + jsgf_grammar_free(jsgf); + E_ERROR("No public rules found in input string\n"); + return NULL; + } + fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); + jsgf_grammar_free(jsgf); + return fsg; +} + + +int +jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh) +{ + fsg_model_t *fsg; + logmath_t *lmath = logmath_init(1.0001, 0, 0); + + if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL) + goto error_out; + + fsg_model_write(fsg, outfh); + logmath_free(lmath); + return 0; + +error_out: + logmath_free(lmath); + return -1; +} + +jsgf_rule_t * +jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public) +{ + jsgf_rule_t *rule; + void *val; + + if (name == NULL) { + name = ckd_malloc(strlen(jsgf->name) + 16); + sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules)); + } + else { + char *newname; + + newname = jsgf_fullname(jsgf, name); + name = newname; + } + + rule = ckd_calloc(1, sizeof(*rule)); + rule->refcnt = 1; + rule->name = ckd_salloc(name); + rule->rhs = rhs; + rule->is_public = is_public; + + E_INFO("Defined rule: %s%s\n", + rule->is_public ? "PUBLIC " : "", + rule->name); + val = hash_table_enter(jsgf->rules, name, rule); + if (val != (void *)rule) { + E_WARN("Multiply defined symbol: %s\n", name); + } + return rule; +} + +jsgf_rule_t * +jsgf_rule_retain(jsgf_rule_t *rule) +{ + ++rule->refcnt; + return rule; +} + +int +jsgf_rule_free(jsgf_rule_t *rule) +{ + if (rule == NULL) + return 0; + if (--rule->refcnt > 0) + return rule->refcnt; + jsgf_rhs_free(rule->rhs); + ckd_free(rule->name); + ckd_free(rule); + return 0; +} + + +/* FIXME: This should go in libsphinxutil */ +static char * +path_list_search(glist_t paths, char *path) +{ + gnode_t *gn; + + for (gn = paths; gn; gn = gnode_next(gn)) { + char *fullpath; + FILE *tmp; + + fullpath = string_join(gnode_ptr(gn), "/", path, NULL); + tmp = fopen(fullpath, "r"); + if (tmp != NULL) { + fclose(tmp); + return fullpath; + } + else { + ckd_free(fullpath); + } + } + return NULL; +} + +jsgf_rule_t * +jsgf_import_rule(jsgf_t *jsgf, char *name) +{ + char *c, *path, *newpath; + size_t namelen, packlen; + void *val; + jsgf_t *imp; + int import_all; + + /* Trim the leading and trailing <> */ + namelen = strlen(name); + path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */ + strcpy(path, name + 1); + /* Split off the first part of the name */ + c = strrchr(path, '.'); + if (c == NULL) { + E_ERROR("Imported rule is not qualified: %s\n", name); + ckd_free(path); + return NULL; + } + packlen = c - path; + *c = '\0'; + + /* Look for import foo.* */ + import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>")); + + /* Construct a filename. */ + for (c = path; *c; ++c) + if (*c == '.') *c = '/'; + strcat(path, ".gram"); + newpath = path_list_search(jsgf->searchpath, path); + if (newpath == NULL) { + E_ERROR("Failed to find grammar %s\n", path); + ckd_free(path); + return NULL; + } + ckd_free(path); + + path = newpath; + E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name); + + /* FIXME: Also, we need to make sure that path is fully qualified + * here, by adding any prefixes from jsgf->name to it. */ + /* See if we have parsed it already */ + if (hash_table_lookup(jsgf->imports, path, &val) == 0) { + E_INFO("Already imported %s\n", path); + imp = val; + ckd_free(path); + } + else { + /* If not, parse it. */ + imp = jsgf_parse_file(path, jsgf); + val = hash_table_enter(jsgf->imports, path, imp); + if (val != (void *)imp) { + E_WARN("Multiply imported file: %s\n", path); + } + } + if (imp != NULL) { + hash_iter_t *itor; + /* Look for public rules matching rulename. */ + for (itor = hash_table_iter(imp->rules); itor; + itor = hash_table_iter_next(itor)) { + hash_entry_t *he = itor->ent; + jsgf_rule_t *rule = hash_entry_val(he); + int rule_matches; + char *rule_name = importname2rulename(name); + + if (import_all) { + /* Match package name (symbol table is shared) */ + rule_matches = !strncmp(rule_name, rule->name, packlen + 1); + } + else { + /* Exact match */ + rule_matches = !strcmp(rule_name, rule->name); + } + ckd_free(rule_name); + if (rule->is_public && rule_matches) { + void *val; + char *newname; + + /* Link this rule into the current namespace. */ + c = strrchr(rule->name, '.'); + assert(c != NULL); + newname = jsgf_fullname(jsgf, c); + + E_INFO("Imported %s\n", newname); + val = hash_table_enter(jsgf->rules, newname, + jsgf_rule_retain(rule)); + if (val != (void *)rule) { + E_WARN("Multiply defined symbol: %s\n", newname); + } + if (!import_all) { + hash_table_iter_free(itor); + return rule; + } + } + } + } + + return NULL; +} + +static void +jsgf_set_search_path(jsgf_t *jsgf, const char *filename) +{ + char *jsgf_path; + +#if !defined(_WIN32_WCE) + if ((jsgf_path = getenv("JSGF_PATH")) != NULL) { + char *word, *c; + /* FIXME: This should be a function in libsphinxbase. */ + word = jsgf_path = ckd_salloc(jsgf_path); + while ((c = strchr(word, ':'))) { + *c = '\0'; + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); + word = c + 1; + } + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); + jsgf->searchpath = glist_reverse(jsgf->searchpath); + return; + } +#endif + + if (!filename) { + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, ckd_salloc(".")); + return; + } + + jsgf_path = ckd_salloc(filename); + path2dirname(filename, jsgf_path); + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path); +} + +jsgf_t * +jsgf_parse_file(const char *filename, jsgf_t *parent) +{ + yyscan_t yyscanner; + jsgf_t *jsgf; + int yyrv; + FILE *in = NULL; + + yylex_init(&yyscanner); + if (filename == NULL) { + yyset_in(stdin, yyscanner); + } + else { + in = fopen(filename, "r"); + if (in == NULL) { + E_ERROR_SYSTEM("Failed to open %s for parsing", filename); + return NULL; + } + yyset_in(in, yyscanner); + } + + jsgf = jsgf_grammar_new(parent); + + if (!parent) + jsgf_set_search_path(jsgf, filename); + + yyrv = yyparse(yyscanner, jsgf); + if (yyrv != 0) { + E_ERROR("Failed to parse JSGF grammar from '%s'\n", filename ? filename : "(stdin)"); + jsgf_grammar_free(jsgf); + yylex_destroy(yyscanner); + return NULL; + } + if (in) + fclose(in); + yylex_destroy(yyscanner); + + return jsgf; +} + +jsgf_t * +jsgf_parse_string(const char *string, jsgf_t * parent) +{ + yyscan_t yyscanner; + jsgf_t *jsgf; + int yyrv; + YY_BUFFER_STATE buf; + + yylex_init(&yyscanner); + buf = yy_scan_string(string, yyscanner); + + jsgf = jsgf_grammar_new(parent); + if (!parent) + jsgf_set_search_path(jsgf, NULL); + + yyrv = yyparse(yyscanner, jsgf); + if (yyrv != 0) { + E_ERROR("Failed to parse JSGF grammar from input string\n"); + jsgf_grammar_free(jsgf); + yy_delete_buffer(buf, yyscanner); + yylex_destroy(yyscanner); + return NULL; + } + yy_delete_buffer(buf, yyscanner); + yylex_destroy(yyscanner); + + return jsgf; +} |