diff options
Diffstat (limited to 'media/pocketsphinx/src/bin_mdef.h')
-rw-r--r-- | media/pocketsphinx/src/bin_mdef.h | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/bin_mdef.h b/media/pocketsphinx/src/bin_mdef.h new file mode 100644 index 000000000..96cad5986 --- /dev/null +++ b/media/pocketsphinx/src/bin_mdef.h @@ -0,0 +1,236 @@ +/* -*- c-file-style: "linux" -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file bin_mdef.h + * + * Binary format model definition files, with support for + * heterogeneous topologies and variable-size N-phones + * + * @author David Huggins-Daines <dhuggins@cs.cmu.edu> + */ +#ifndef __BIN_MDEF_H__ +#define __BIN_MDEF_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* SphinxBase headers. */ +#include <sphinxbase/mmio.h> +#include <sphinxbase/cmd_ln.h> +#include <pocketsphinx_export.h> + +#include "mdef.h" + +#define BIN_MDEF_FORMAT_VERSION 1 +/* Little-endian machines will write "BMDF" to disk, big-endian ones "FDMB". */ +#define BIN_MDEF_NATIVE_ENDIAN 0x46444d42 /* 'BMDF' in little-endian order */ +#define BIN_MDEF_OTHER_ENDIAN 0x424d4446 /* 'BMDF' in big-endian order */ +#ifdef __GNUC__ +#define __ATTRIBUTE_PACKED __attribute__((packed)) +#else +#define __ATTRIBUTE_PACKED +#endif + +/** + * Phone entry (on-disk, 12 bytes) + */ +typedef struct mdef_entry_s mdef_entry_t; +struct mdef_entry_s { + int32 ssid; /**< Senone sequence ID */ + int32 tmat; /**< Transition matrix ID */ + /* FIXME: is any of this actually necessary? */ + union { + /**< CI phone information - attributes (just "filler" for now) */ + struct { + uint8 filler; + uint8 reserved[3]; + } ci; + /**< CD phone information - context info. */ + struct { + uint8 wpos; + uint8 ctx[3]; /**< quintphones will require hacking */ + } cd; + } info; +} __ATTRIBUTE_PACKED; + +/** + * Invalid senone sequence ID (limited to 16 bits for PocketSphinx). + */ +#define BAD_SSID 0xffff +/** + * Invalid senone ID (limited to 16 bits for PocketSphinx). + */ +#define BAD_SENID 0xffff + +/** + * Node in CD phone tree (on-disk, 8 bytes). + */ +typedef struct cd_tree_s cd_tree_t; +struct cd_tree_s { + int16 ctx; /**< Context (word position or CI phone) */ + int16 n_down; /**< Number of children (0 for leafnode) */ + union { + int32 pid; /**< Phone ID (leafnode) */ + int32 down; /**< Next level of the tree (offset from start of cd_trees) */ + } c; +}; + +/** + * Model definition structure (in-memory). + */ +typedef struct bin_mdef_s bin_mdef_t; +struct bin_mdef_s { + int refcnt; + int32 n_ciphone; /**< Number of base (CI) phones */ + int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */ + int32 n_emit_state; /**< Number of emitting states per phone (0 for heterogeneous) */ + int32 n_ci_sen; /**< Number of CI senones; these are the first */ + int32 n_sen; /**< Number of senones (CI+CD) */ + int32 n_tmat; /**< Number of transition matrices */ + int32 n_sseq; /**< Number of unique senone sequences */ + int32 n_ctx; /**< Number of phones of context */ + int32 n_cd_tree; /**< Number of nodes in cd_tree (below) */ + int16 sil; /**< CI phone ID for silence */ + + mmio_file_t *filemap;/**< File map for this file (if any) */ + char **ciname; /**< CI phone names */ + cd_tree_t *cd_tree; /**< Tree mapping CD phones to phone IDs */ + mdef_entry_t *phone; /**< All phone structures */ + uint16 **sseq; /**< Unique senone sequences (2D array built at load time) */ + uint8 *sseq_len; /**< Number of states in each sseq (NULL for homogeneous) */ + + /* These two are not stored on disk, but are generated at load time. */ + int16 *cd2cisen; /**< Parent CI-senone id for each senone */ + int16 *sen2cimap; /**< Parent CI-phone for each senone (CI or CD) */ + + /** Allocation mode for this object. */ + enum { BIN_MDEF_FROM_TEXT, BIN_MDEF_IN_MEMORY, BIN_MDEF_ON_DISK } alloc_mode; +}; + +#define bin_mdef_is_fillerphone(m,p) (((p) < (m)->n_ciphone) \ + ? (m)->phone[p].info.ci.filler \ + : (m)->phone[(m)->phone[p].info.cd.ctx[0]].info.ci.filler) +#define bin_mdef_is_ciphone(m,p) ((p) < (m)->n_ciphone) +#define bin_mdef_n_ciphone(m) ((m)->n_ciphone) +#define bin_mdef_n_phone(m) ((m)->n_phone) +#define bin_mdef_n_sseq(m) ((m)->n_sseq) +#define bin_mdef_n_emit_state(m) ((m)->n_emit_state) +#define bin_mdef_n_emit_state_phone(m,p) ((m)->n_emit_state ? (m)->n_emit_state \ + : (m)->sseq_len[(m)->phone[p].ssid]) +#define bin_mdef_n_sen(m) ((m)->n_sen) +#define bin_mdef_n_tmat(m) ((m)->n_tmat) +#define bin_mdef_pid2ssid(m,p) ((m)->phone[p].ssid) +#define bin_mdef_pid2tmatid(m,p) ((m)->phone[p].tmat) +#define bin_mdef_silphone(m) ((m)->sil) +#define bin_mdef_sen2cimap(m,s) ((m)->sen2cimap[s]) +#define bin_mdef_sseq2sen(m,ss,pos) ((m)->sseq[ss][pos]) +#define bin_mdef_pid2ci(m,p) (((p) < (m)->n_ciphone) ? (p) \ + : (m)->phone[p].info.cd.ctx[0]) + +/** + * Read a binary mdef from a file. + */ +POCKETSPHINX_EXPORT +bin_mdef_t *bin_mdef_read(cmd_ln_t *config, const char *filename); +/** + * Read a text mdef from a file (creating an in-memory binary mdef). + */ +POCKETSPHINX_EXPORT +bin_mdef_t *bin_mdef_read_text(cmd_ln_t *config, const char *filename); +/** + * Write a binary mdef to a file. + */ +POCKETSPHINX_EXPORT +int bin_mdef_write(bin_mdef_t *m, const char *filename); +/** + * Write a binary mdef to a text file. + */ +POCKETSPHINX_EXPORT +int bin_mdef_write_text(bin_mdef_t *m, const char *filename); +/** + * Retain a pointer to a bin_mdef_t. + */ +bin_mdef_t *bin_mdef_retain(bin_mdef_t *m); +/** + * Release a pointer to a binary mdef. + */ +int bin_mdef_free(bin_mdef_t *m); + +/** + * Context-independent phone lookup. + * @return phone id for ciphone. + */ +int bin_mdef_ciphone_id(bin_mdef_t *m, /**< In: Model structure being queried */ + const char *ciphone); /**< In: ciphone for which id wanted */ + +/** + * Case-insensitive context-independent phone lookup. + * @return phone id for ciphone. + */ +int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, /**< In: Model structure being queried */ + const char *ciphone); /**< In: ciphone for which id wanted */ + +/* Return value: READ-ONLY ciphone string name for the given ciphone id */ +const char *bin_mdef_ciphone_str(bin_mdef_t *m, /**< In: Model structure being queried */ + int32 ci); /**< In: ciphone id for which name wanted */ + +/* Return value: phone id for the given constituents if found, else -1 */ +int bin_mdef_phone_id(bin_mdef_t *m, /**< In: Model structure being queried */ + int32 b, /**< In: base ciphone id */ + int32 l, /**< In: left context ciphone id */ + int32 r, /**< In: right context ciphone id */ + int32 pos); /**< In: Word position */ + +/* Look up a phone id, backing off to other word positions. */ +int bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, + int32 l, int32 r, int32 pos); + +/** + * Create a phone string for the given phone (base or triphone) id in the given buf. + * + * @return 0 if successful, -1 if error. + */ +int bin_mdef_phone_str(bin_mdef_t *m, /**< In: Model structure being queried */ + int pid, /**< In: phone id being queried */ + char *buf); /**< Out: On return, buf has the string */ + +#ifdef __cplusplus +}; /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __BIN_MDEF_H__ */ |