summaryrefslogtreecommitdiffstats
path: root/media/pocketsphinx/src/ps_alignment.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/pocketsphinx/src/ps_alignment.c')
-rw-r--r--media/pocketsphinx/src/ps_alignment.c487
1 files changed, 487 insertions, 0 deletions
diff --git a/media/pocketsphinx/src/ps_alignment.c b/media/pocketsphinx/src/ps_alignment.c
new file mode 100644
index 000000000..07b9225bd
--- /dev/null
+++ b/media/pocketsphinx/src/ps_alignment.c
@@ -0,0 +1,487 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2010 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+/**
+ * @file ps_alignment.c Multi-level alignment structure
+ */
+
+/* System headers. */
+
+/* SphinxBase headers. */
+#include <sphinxbase/ckd_alloc.h>
+
+/* Local headers. */
+#include "ps_alignment.h"
+
+ps_alignment_t *
+ps_alignment_init(dict2pid_t *d2p)
+{
+ ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
+ al->d2p = dict2pid_retain(d2p);
+ return al;
+}
+
+int
+ps_alignment_free(ps_alignment_t *al)
+{
+ if (al == NULL)
+ return 0;
+ dict2pid_free(al->d2p);
+ ckd_free(al->word.seq);
+ ckd_free(al->sseq.seq);
+ ckd_free(al->state.seq);
+ ckd_free(al);
+ return 0;
+}
+
+#define VECTOR_GROW 10
+static void *
+vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
+{
+ int newsize = *n + 1;
+ if (newsize < *n_alloc) {
+ *n += 1;
+ return ptr;
+ }
+ newsize += VECTOR_GROW;
+ if (newsize > 0xffff)
+ return NULL;
+ ptr = ckd_realloc(ptr, newsize * item_size);
+ *n += 1;
+ *n_alloc = newsize;
+ return ptr;
+}
+
+static ps_alignment_entry_t *
+ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
+{
+ void *ptr;
+ ptr = vector_grow_one(vec->seq, &vec->n_alloc,
+ &vec->n_ent, sizeof(*vec->seq));
+ if (ptr == NULL)
+ return NULL;
+ vec->seq = ptr;
+ return vec->seq + vec->n_ent - 1;
+}
+
+static void
+ps_alignment_vector_empty(ps_alignment_vector_t *vec)
+{
+ vec->n_ent = 0;
+}
+
+int
+ps_alignment_add_word(ps_alignment_t *al,
+ int32 wid, int duration)
+{
+ ps_alignment_entry_t *ent;
+
+ if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
+ return 0;
+ ent->id.wid = wid;
+ if (al->word.n_ent > 1)
+ ent->start = ent[-1].start + ent[-1].duration;
+ else
+ ent->start = 0;
+ ent->duration = duration;
+ ent->parent = PS_ALIGNMENT_NONE;
+ ent->child = PS_ALIGNMENT_NONE;
+
+ return al->word.n_ent;
+}
+
+int
+ps_alignment_populate(ps_alignment_t *al)
+{
+ dict2pid_t *d2p;
+ dict_t *dict;
+ bin_mdef_t *mdef;
+ int i, lc;
+
+ /* Clear phone and state sequences. */
+ ps_alignment_vector_empty(&al->sseq);
+ ps_alignment_vector_empty(&al->state);
+
+ /* For each word, expand to phones/senone sequences. */
+ d2p = al->d2p;
+ dict = d2p->dict;
+ mdef = d2p->mdef;
+ lc = bin_mdef_silphone(mdef);
+ for (i = 0; i < al->word.n_ent; ++i) {
+ ps_alignment_entry_t *went = al->word.seq + i;
+ ps_alignment_entry_t *sent;
+ int wid = went->id.wid;
+ int len = dict_pronlen(dict, wid);
+ int j, rc;
+
+ if (i < al->word.n_ent - 1)
+ rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
+ else
+ rc = bin_mdef_silphone(mdef);
+
+ /* First phone. */
+ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
+ E_ERROR("Failed to add phone entry!\n");
+ return -1;
+ }
+ sent->id.pid.cipid = dict_first_phone(dict, wid);
+ sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
+ sent->start = went->start;
+ sent->duration = went->duration;
+ sent->parent = i;
+ went->child = (uint16)(sent - al->sseq.seq);
+ if (len == 1)
+ sent->id.pid.ssid
+ = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
+ else
+ sent->id.pid.ssid
+ = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
+ dict_second_phone(dict, wid), lc);
+ assert(sent->id.pid.ssid != BAD_SSID);
+
+ /* Internal phones. */
+ for (j = 1; j < len - 1; ++j) {
+ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
+ E_ERROR("Failed to add phone entry!\n");
+ return -1;
+ }
+ sent->id.pid.cipid = dict_pron(dict, wid, j);
+ sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
+ sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
+ assert(sent->id.pid.ssid != BAD_SSID);
+ sent->start = went->start;
+ sent->duration = went->duration;
+ sent->parent = i;
+ }
+
+ /* Last phone. */
+ if (j < len) {
+ xwdssid_t *rssid;
+ assert(j == len - 1);
+ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
+ E_ERROR("Failed to add phone entry!\n");
+ return -1;
+ }
+ sent->id.pid.cipid = dict_last_phone(dict, wid);
+ sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
+ rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
+ dict_second_last_phone(dict, wid));
+ sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
+ assert(sent->id.pid.ssid != BAD_SSID);
+ sent->start = went->start;
+ sent->duration = went->duration;
+ sent->parent = i;
+ }
+ /* Update lc. Could just use sent->id.pid.cipid here but that
+ * seems needlessly obscure. */
+ lc = dict_last_phone(dict, wid);
+ }
+
+ /* For each senone sequence, expand to senones. (we could do this
+ * nested above but this makes it more clear and easier to
+ * refactor) */
+ for (i = 0; i < al->sseq.n_ent; ++i) {
+ ps_alignment_entry_t *pent = al->sseq.seq + i;
+ ps_alignment_entry_t *sent;
+ int j;
+
+ for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
+ if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
+ E_ERROR("Failed to add state entry!\n");
+ return -1;
+ }
+ sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
+ assert(sent->id.senid != BAD_SENID);
+ sent->start = pent->start;
+ sent->duration = pent->duration;
+ sent->parent = i;
+ if (j == 0)
+ pent->child = (uint16)(sent - al->state.seq);
+ }
+ }
+
+ return 0;
+}
+
+/* FIXME: Somewhat the same as the above function, needs refactoring */
+int
+ps_alignment_populate_ci(ps_alignment_t *al)
+{
+ dict2pid_t *d2p;
+ dict_t *dict;
+ bin_mdef_t *mdef;
+ int i;
+
+ /* Clear phone and state sequences. */
+ ps_alignment_vector_empty(&al->sseq);
+ ps_alignment_vector_empty(&al->state);
+
+ /* For each word, expand to phones/senone sequences. */
+ d2p = al->d2p;
+ dict = d2p->dict;
+ mdef = d2p->mdef;
+ for (i = 0; i < al->word.n_ent; ++i) {
+ ps_alignment_entry_t *went = al->word.seq + i;
+ ps_alignment_entry_t *sent;
+ int wid = went->id.wid;
+ int len = dict_pronlen(dict, wid);
+ int j;
+
+ for (j = 0; j < len; ++j) {
+ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
+ E_ERROR("Failed to add phone entry!\n");
+ return -1;
+ }
+ sent->id.pid.cipid = dict_pron(dict, wid, j);
+ sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
+ sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
+ assert(sent->id.pid.ssid != BAD_SSID);
+ sent->start = went->start;
+ sent->duration = went->duration;
+ sent->parent = i;
+ }
+ }
+
+ /* For each senone sequence, expand to senones. (we could do this
+ * nested above but this makes it more clear and easier to
+ * refactor) */
+ for (i = 0; i < al->sseq.n_ent; ++i) {
+ ps_alignment_entry_t *pent = al->sseq.seq + i;
+ ps_alignment_entry_t *sent;
+ int j;
+
+ for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
+ if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
+ E_ERROR("Failed to add state entry!\n");
+ return -1;
+ }
+ sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
+ assert(sent->id.senid != BAD_SENID);
+ sent->start = pent->start;
+ sent->duration = pent->duration;
+ sent->parent = i;
+ if (j == 0)
+ pent->child = (uint16)(sent - al->state.seq);
+ }
+ }
+
+ return 0;
+}
+
+int
+ps_alignment_propagate(ps_alignment_t *al)
+{
+ ps_alignment_entry_t *last_ent = NULL;
+ int i;
+
+ /* Propagate duration up from states to phones. */
+ for (i = 0; i < al->state.n_ent; ++i) {
+ ps_alignment_entry_t *sent = al->state.seq + i;
+ ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
+ if (pent != last_ent) {
+ pent->start = sent->start;
+ pent->duration = 0;
+ }
+ pent->duration += sent->duration;
+ last_ent = pent;
+ }
+
+ /* Propagate duration up from phones to words. */
+ last_ent = NULL;
+ for (i = 0; i < al->sseq.n_ent; ++i) {
+ ps_alignment_entry_t *pent = al->sseq.seq + i;
+ ps_alignment_entry_t *went = al->word.seq + pent->parent;
+ if (went != last_ent) {
+ went->start = pent->start;
+ went->duration = 0;
+ }
+ went->duration += pent->duration;
+ last_ent = went;
+ }
+
+ return 0;
+}
+
+int
+ps_alignment_n_words(ps_alignment_t *al)
+{
+ return (int)al->word.n_ent;
+}
+
+int
+ps_alignment_n_phones(ps_alignment_t *al)
+{
+ return (int)al->sseq.n_ent;
+}
+
+int
+ps_alignment_n_states(ps_alignment_t *al)
+{
+ return (int)al->state.n_ent;
+}
+
+ps_alignment_iter_t *
+ps_alignment_words(ps_alignment_t *al)
+{
+ ps_alignment_iter_t *itor;
+
+ if (al->word.n_ent == 0)
+ return NULL;
+ itor = ckd_calloc(1, sizeof(*itor));
+ itor->al = al;
+ itor->vec = &al->word;
+ itor->pos = 0;
+ return itor;
+}
+
+ps_alignment_iter_t *
+ps_alignment_phones(ps_alignment_t *al)
+{
+ ps_alignment_iter_t *itor;
+
+ if (al->sseq.n_ent == 0)
+ return NULL;
+ itor = ckd_calloc(1, sizeof(*itor));
+ itor->al = al;
+ itor->vec = &al->sseq;
+ itor->pos = 0;
+ return itor;
+}
+
+ps_alignment_iter_t *
+ps_alignment_states(ps_alignment_t *al)
+{
+ ps_alignment_iter_t *itor;
+
+ if (al->state.n_ent == 0)
+ return NULL;
+ itor = ckd_calloc(1, sizeof(*itor));
+ itor->al = al;
+ itor->vec = &al->state;
+ itor->pos = 0;
+ return itor;
+}
+
+ps_alignment_entry_t *
+ps_alignment_iter_get(ps_alignment_iter_t *itor)
+{
+ return itor->vec->seq + itor->pos;
+}
+
+int
+ps_alignment_iter_free(ps_alignment_iter_t *itor)
+{
+ ckd_free(itor);
+ return 0;
+}
+
+ps_alignment_iter_t *
+ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
+{
+ if (itor == NULL)
+ return NULL;
+ if (pos >= itor->vec->n_ent) {
+ ps_alignment_iter_free(itor);
+ return NULL;
+ }
+ itor->pos = pos;
+ return itor;
+}
+
+ps_alignment_iter_t *
+ps_alignment_iter_next(ps_alignment_iter_t *itor)
+{
+ if (itor == NULL)
+ return NULL;
+ if (++itor->pos >= itor->vec->n_ent) {
+ ps_alignment_iter_free(itor);
+ return NULL;
+ }
+ return itor;
+}
+
+ps_alignment_iter_t *
+ps_alignment_iter_prev(ps_alignment_iter_t *itor)
+{
+ if (itor == NULL)
+ return NULL;
+ if (--itor->pos < 0) {
+ ps_alignment_iter_free(itor);
+ return NULL;
+ }
+ return itor;
+}
+
+ps_alignment_iter_t *
+ps_alignment_iter_up(ps_alignment_iter_t *itor)
+{
+ ps_alignment_iter_t *itor2;
+ if (itor == NULL)
+ return NULL;
+ if (itor->vec == &itor->al->word)
+ return NULL;
+ if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
+ return NULL;
+ itor2 = ckd_calloc(1, sizeof(*itor2));
+ itor2->al = itor->al;
+ itor2->pos = itor->vec->seq[itor->pos].parent;
+ if (itor->vec == &itor->al->sseq)
+ itor2->vec = &itor->al->word;
+ else
+ itor2->vec = &itor->al->sseq;
+ return itor2;
+}
+
+ps_alignment_iter_t *
+ps_alignment_iter_down(ps_alignment_iter_t *itor)
+{
+ ps_alignment_iter_t *itor2;
+ if (itor == NULL)
+ return NULL;
+ if (itor->vec == &itor->al->state)
+ return NULL;
+ if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
+ return NULL;
+ itor2 = ckd_calloc(1, sizeof(*itor2));
+ itor2->al = itor->al;
+ itor2->pos = itor->vec->seq[itor->pos].child;
+ if (itor->vec == &itor->al->word)
+ itor2->vec = &itor->al->sseq;
+ else
+ itor2->vec = &itor->al->state;
+ return itor2;
+}