/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ /* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * */ #include <string.h> #include "dict2pid.h" #include "hmm.h" /** * @file dict2pid.c - dictionary word to senone sequence mappings */ void compress_table(s3ssid_t * uncomp_tab, s3ssid_t * com_tab, s3cipid_t * ci_map, int32 n_ci) { int32 found; int32 r; int32 tmp_r; for (r = 0; r < n_ci; r++) { com_tab[r] = BAD_S3SSID; ci_map[r] = BAD_S3CIPID; } /** Compress this map */ for (r = 0; r < n_ci; r++) { found = 0; for (tmp_r = 0; tmp_r < r && com_tab[tmp_r] != BAD_S3SSID; tmp_r++) { /* If it appears before, just filled in cimap; */ if (uncomp_tab[r] == com_tab[tmp_r]) { found = 1; ci_map[r] = tmp_r; break; } } if (found == 0) { com_tab[tmp_r] = uncomp_tab[r]; ci_map[r] = tmp_r; } } } static void compress_right_context_tree(dict2pid_t * d2p, s3ssid_t ***rdiph_rc) { int32 n_ci; int32 b, l, r; s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; bin_mdef_t *mdef = d2p->mdef; size_t alloc; n_ci = mdef->n_ciphone; tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t)); tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t)); d2p->rssid = (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *)); alloc = mdef->n_ciphone * sizeof(xwdssid_t *); for (b = 0; b < n_ci; b++) { d2p->rssid[b] = (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t)); alloc += mdef->n_ciphone * sizeof(xwdssid_t); for (l = 0; l < n_ci; l++) { rmap = rdiph_rc[b][l]; compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++); if (tmpssid[0] != BAD_S3SSID) { d2p->rssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t)); memcpy(d2p->rssid[b][l].ssid, tmpssid, r * sizeof(s3ssid_t)); d2p->rssid[b][l].cimap = ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t)); memcpy(d2p->rssid[b][l].cimap, tmpcimap, (mdef->n_ciphone) * sizeof(s3cipid_t)); d2p->rssid[b][l].n_ssid = r; } else { d2p->rssid[b][l].ssid = NULL; d2p->rssid[b][l].cimap = NULL; d2p->rssid[b][l].n_ssid = 0; } } } E_INFO("Allocated %d bytes (%d KiB) for word-final triphones\n", (int)alloc, (int)alloc / 1024); ckd_free(tmpssid); ckd_free(tmpcimap); } static void compress_left_right_context_tree(dict2pid_t * d2p) { int32 n_ci; int32 b, l, r; s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; bin_mdef_t *mdef = d2p->mdef; size_t alloc; n_ci = mdef->n_ciphone; tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t)); tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t)); assert(d2p->lrdiph_rc); d2p->lrssid = (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *)); alloc = mdef->n_ciphone * sizeof(xwdssid_t *); for (b = 0; b < n_ci; b++) { d2p->lrssid[b] = (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t)); alloc += mdef->n_ciphone * sizeof(xwdssid_t); for (l = 0; l < n_ci; l++) { rmap = d2p->lrdiph_rc[b][l]; compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++); if (tmpssid[0] != BAD_S3SSID) { d2p->lrssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t)); memcpy(d2p->lrssid[b][l].ssid, tmpssid, r * sizeof(s3ssid_t)); d2p->lrssid[b][l].cimap = ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t)); memcpy(d2p->lrssid[b][l].cimap, tmpcimap, (mdef->n_ciphone) * sizeof(s3cipid_t)); d2p->lrssid[b][l].n_ssid = r; } else { d2p->lrssid[b][l].ssid = NULL; d2p->lrssid[b][l].cimap = NULL; d2p->lrssid[b][l].n_ssid = 0; } } } /* Try to compress lrdiph_rc into lrdiph_rc_compressed */ ckd_free(tmpssid); ckd_free(tmpcimap); E_INFO("Allocated %d bytes (%d KiB) for single-phone word triphones\n", (int)alloc, (int)alloc / 1024); } /** ARCHAN, A duplicate of get_rc_npid in ctxt_table.h. I doubt whether it is correct because the compressed map has not been checked. */ int32 get_rc_nssid(dict2pid_t * d2p, s3wid_t w) { int32 pronlen; s3cipid_t b, lc; dict_t *dict = d2p->dict; pronlen = dict->word[w].pronlen; b = dict->word[w].ciphone[pronlen - 1]; if (pronlen == 1) { /* Is this true ? No known left context. But all cimaps (for any l) are identical; pick one */ /*E_INFO("Single phone word\n"); */ return (d2p->lrssid[b][0].n_ssid); } else { /* E_INFO("Multiple phone word\n"); */ lc = dict->word[w].ciphone[pronlen - 2]; return (d2p->rssid[b][lc].n_ssid); } } s3cipid_t * dict2pid_get_rcmap(dict2pid_t * d2p, s3wid_t w) { int32 pronlen; s3cipid_t b, lc; dict_t *dict = d2p->dict; pronlen = dict->word[w].pronlen; b = dict->word[w].ciphone[pronlen - 1]; if (pronlen == 1) { /* Is this true ? No known left context. But all cimaps (for any l) are identical; pick one */ /*E_INFO("Single phone word\n"); */ return (d2p->lrssid[b][0].cimap); } else { /* E_INFO("Multiple phone word\n"); */ lc = dict->word[w].ciphone[pronlen - 2]; return (d2p->rssid[b][lc].cimap); } } static void free_compress_map(xwdssid_t ** tree, int32 n_ci) { int32 b, l; for (b = 0; b < n_ci; b++) { for (l = 0; l < n_ci; l++) { ckd_free(tree[b][l].ssid); ckd_free(tree[b][l].cimap); } ckd_free(tree[b]); } ckd_free(tree); } static void populate_lrdiph(dict2pid_t *d2p, s3ssid_t ***rdiph_rc, s3cipid_t b) { bin_mdef_t *mdef = d2p->mdef; s3cipid_t l, r; for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3pid_t p; p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_SINGLE); d2p->lrdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); if (r == bin_mdef_silphone(mdef)) d2p->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); if (rdiph_rc && l == bin_mdef_silphone(mdef)) rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); assert(IS_S3SSID(bin_mdef_pid2ssid(mdef, p))); E_DEBUG(2,("%s(%s,%s) => %d / %d\n", bin_mdef_ciphone_str(mdef, b), bin_mdef_ciphone_str(mdef, l), bin_mdef_ciphone_str(mdef, r), p, bin_mdef_pid2ssid(mdef, p))); } } } int dict2pid_add_word(dict2pid_t *d2p, int32 wid) { bin_mdef_t *mdef = d2p->mdef; dict_t *d = d2p->dict; if (dict_pronlen(d, wid) > 1) { s3cipid_t l; /* Make sure we have left and right context diphones for this * word. */ if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0] == BAD_S3SSID) { E_DEBUG(2, ("Filling in left-context diphones for %s(?,%s)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid)))); for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { int p = bin_mdef_phone_id_nearest(mdef, dict_first_phone(d, wid), l, dict_second_phone(d, wid), WORD_POSN_BEGIN); d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l] = bin_mdef_pid2ssid(mdef, p); } } if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid == 0) { s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; s3cipid_t r; E_DEBUG(2, ("Filling in right-context diphones for %s(%s,?)\n", bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid)))); rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap)); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { int p = bin_mdef_phone_id_nearest(mdef, dict_last_phone(d, wid), dict_second_last_phone(d, wid), r, WORD_POSN_END); rmap[r] = bin_mdef_pid2ssid(mdef, p); } tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid)); tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap)); compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef)); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++) ; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r; ckd_free(rmap); } } else { /* Make sure we have a left-right context triphone entry for * this word. */ E_INFO("Filling in context triphones for %s(?,?)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid))); if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) { populate_lrdiph(d2p, NULL, dict_first_phone(d, wid)); } } return 0; } s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos) { int b, l, r, p; dict_t *dict = d2p->dict; bin_mdef_t *mdef = d2p->mdef; if (pos == 0 || pos == dict_pronlen(dict, wid)) return BAD_S3SSID; b = dict_pron(dict, wid, pos); l = dict_pron(dict, wid, pos - 1); r = dict_pron(dict, wid, pos + 1); p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_INTERNAL); return bin_mdef_pid2ssid(mdef, p); } dict2pid_t * dict2pid_build(bin_mdef_t * mdef, dict_t * dict) { dict2pid_t *dict2pid; s3ssid_t ***rdiph_rc; bitvec_t *ldiph, *rdiph, *single; int32 pronlen; int32 b, l, r, w, p; E_INFO("Building PID tables for dictionary\n"); assert(mdef); assert(dict); dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); dict2pid->refcount = 1; dict2pid->mdef = bin_mdef_retain(mdef); dict2pid->dict = dict_retain(dict); E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", mdef->n_ciphone, sizeof(s3ssid_t), mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); /* Only used internally to generate rssid */ rdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof (s3ssid_t)); /* Actually could use memset for this, if BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef->n_ciphone; ++b) { for (r = 0; r < mdef->n_ciphone; ++r) { for (l = 0; l < mdef->n_ciphone; ++l) { dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; rdiph_rc[b][l][r] = BAD_S3SSID; } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); single = bitvec_alloc(mdef->n_ciphone); for (w = 0; w < dict_size(dict2pid->dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen >= 2) { b = dict_first_phone(dict, w); r = dict_second_phone(dict, w); /* Populate ldiph_lc */ if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { /* Mark this diphone as done */ bitvec_set(ldiph, b * mdef->n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); } } /* Populate rdiph_rc */ l = dict_second_last_phone(dict, w); b = dict_last_phone(dict, w); if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { /* Mark this diphone as done */ bitvec_set(rdiph, b * mdef->n_ciphone + l); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_END); rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); } } } else if (pronlen == 1) { b = dict_pron(dict, w, 0); E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n", dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec_is_clear(single, b)) { populate_lrdiph(dict2pid, rdiph_rc, b); bitvec_set(single, b); } } } bitvec_free(ldiph); bitvec_free(rdiph); bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(dict2pid, rdiph_rc); compress_left_right_context_tree(dict2pid); ckd_free_3d(rdiph_rc); dict2pid_report(dict2pid); return dict2pid; } dict2pid_t * dict2pid_retain(dict2pid_t *d2p) { ++d2p->refcount; return d2p; } int dict2pid_free(dict2pid_t * d2p) { if (d2p == NULL) return 0; if (--d2p->refcount > 0) return d2p->refcount; if (d2p->ldiph_lc) ckd_free_3d((void ***) d2p->ldiph_lc); if (d2p->lrdiph_rc) ckd_free_3d((void ***) d2p->lrdiph_rc); if (d2p->rssid) free_compress_map(d2p->rssid, bin_mdef_n_ciphone(d2p->mdef)); if (d2p->lrssid) free_compress_map(d2p->lrssid, bin_mdef_n_ciphone(d2p->mdef)); bin_mdef_free(d2p->mdef); dict_free(d2p->dict); ckd_free(d2p); return 0; } void dict2pid_report(dict2pid_t * d2p) { } void dict2pid_dump(FILE * fp, dict2pid_t * d2p) { int32 w, p, pronlen; int32 i, j, b, l, r; bin_mdef_t *mdef = d2p->mdef; dict_t *dict = d2p->dict; fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n"); for (w = 0; w < dict_size(dict); w++) { fprintf(fp, "%30s ", dict_wordstr(dict, w)); pronlen = dict_pronlen(dict, w); for (p = 0; p < pronlen; p++) fprintf(fp, " %5d", dict2pid_internal(d2p, w, p)); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# LDIPH_LC (b r l ssid)\n"); for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { if (IS_S3SSID(d2p->ldiph_lc[b][r][l])) fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */ } } } fprintf(fp, "#\n"); fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq); for (i = 0; i < mdef->n_sseq; i++) { fprintf(fp, "%5d ", i); for (j = 0; j < bin_mdef_n_emit_state(mdef); j++) fprintf(fp, " %5d", mdef->sseq[i][j]); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# END\n"); fflush(fp); }