summaryrefslogtreecommitdiffstats
path: root/media/pocketsphinx/src/ptm_mgau.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/pocketsphinx/src/ptm_mgau.c')
-rw-r--r--media/pocketsphinx/src/ptm_mgau.c912
1 files changed, 0 insertions, 912 deletions
diff --git a/media/pocketsphinx/src/ptm_mgau.c b/media/pocketsphinx/src/ptm_mgau.c
deleted file mode 100644
index 2475f2049..000000000
--- a/media/pocketsphinx/src/ptm_mgau.c
+++ /dev/null
@@ -1,912 +0,0 @@
-/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
-/* ====================================================================
- * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * This work was supported in part by funding from the Defense Advanced
- * Research Projects Agency and the National Science Foundation of the
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
-
-/* System headers */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#if defined(__ADSPBLACKFIN__)
-#elif !defined(_WIN32_WCE)
-#include <sys/types.h>
-#endif
-
-/* SphinxBase headers */
-#include <sphinx_config.h>
-#include <sphinxbase/cmd_ln.h>
-#include <sphinxbase/fixpoint.h>
-#include <sphinxbase/ckd_alloc.h>
-#include <sphinxbase/bio.h>
-#include <sphinxbase/err.h>
-#include <sphinxbase/prim_type.h>
-
-/* Local headers */
-#include "tied_mgau_common.h"
-#include "ptm_mgau.h"
-
-static ps_mgaufuncs_t ptm_mgau_funcs = {
- "ptm",
- ptm_mgau_frame_eval, /* frame_eval */
- ptm_mgau_mllr_transform, /* transform */
- ptm_mgau_free /* free */
-};
-
-#define COMPUTE_GMM_MAP(_idx) \
- diff[_idx] = obs[_idx] - mean[_idx]; \
- sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \
- compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
-#define COMPUTE_GMM_REDUCE(_idx) \
- d = GMMSUB(d, compl[_idx]);
-
-static void
-insertion_sort_topn(ptm_topn_t *topn, int i, int32 d)
-{
- ptm_topn_t vtmp;
- int j;
-
- topn[i].score = d;
- if (i == 0)
- return;
- vtmp = topn[i];
- for (j = i - 1; j >= 0 && d > topn[j].score; j--) {
- topn[j + 1] = topn[j];
- }
- topn[j + 1] = vtmp;
-}
-
-static int
-eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
-{
- ptm_topn_t *topn;
- int i, ceplen;
-
- topn = s->f->topn[cb][feat];
- ceplen = s->g->featlen[feat];
-
- for (i = 0; i < s->max_topn; i++) {
- mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
- mfcc_t *var, d;
- mfcc_t *obs;
- int32 cw, j;
-
- cw = topn[i].cw;
- mean = s->g->mean[cb][feat][0] + cw * ceplen;
- var = s->g->var[cb][feat][0] + cw * ceplen;
- d = s->g->det[cb][feat][cw];
- obs = z;
- for (j = 0; j < ceplen % 4; ++j) {
- diff[0] = *obs++ - *mean++;
- sqdiff[0] = MFCCMUL(diff[0], diff[0]);
- compl[0] = MFCCMUL(sqdiff[0], *var);
- d = GMMSUB(d, compl[0]);
- ++var;
- }
- /* We could vectorize this but it's unlikely to make much
- * difference as the outer loop here isn't very big. */
- for (;j < ceplen; j += 4) {
- COMPUTE_GMM_MAP(0);
- COMPUTE_GMM_MAP(1);
- COMPUTE_GMM_MAP(2);
- COMPUTE_GMM_MAP(3);
- COMPUTE_GMM_REDUCE(0);
- COMPUTE_GMM_REDUCE(1);
- COMPUTE_GMM_REDUCE(2);
- COMPUTE_GMM_REDUCE(3);
- var += 4;
- obs += 4;
- mean += 4;
- }
- insertion_sort_topn(topn, i, (int32)d);
- }
-
- return topn[0].score;
-}
-
-/* This looks bad, but it actually isn't. Less than 1% of eval_cb's
- * time is spent doing this. */
-static void
-insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best,
- int cw, int32 intd)
-{
- for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur)
- memcpy(*cur + 1, *cur, sizeof(**cur));
- ++*cur;
- (*cur)->cw = cw;
- (*cur)->score = intd;
-}
-
-static int
-eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
-{
- ptm_topn_t *worst, *best, *topn;
- mfcc_t *mean;
- mfcc_t *var, *det, *detP, *detE;
- int32 i, ceplen;
-
- best = topn = s->f->topn[cb][feat];
- worst = topn + (s->max_topn - 1);
- mean = s->g->mean[cb][feat][0];
- var = s->g->var[cb][feat][0];
- det = s->g->det[cb][feat];
- detE = det + s->g->n_density;
- ceplen = s->g->featlen[feat];
-
- for (detP = det; detP < detE; ++detP) {
- mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
- mfcc_t d, thresh;
- mfcc_t *obs;
- ptm_topn_t *cur;
- int32 cw, j;
-
- d = *detP;
- thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */
- obs = z;
- cw = (int)(detP - det);
-
- /* Unroll the loop starting with the first dimension(s). In
- * theory this might be a bit faster if this Gaussian gets
- * "knocked out" by C0. In practice not. */
- for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
- diff[0] = *obs++ - *mean++;
- sqdiff[0] = MFCCMUL(diff[0], diff[0]);
- compl[0] = MFCCMUL(sqdiff[0], *var++);
- d = GMMSUB(d, compl[0]);
- }
- /* Now do 4 dimensions at a time. You'd think that GCC would
- * vectorize this? Apparently not. And it's right, because
- * that won't make this any faster, at least on x86-64. */
- for (; j < ceplen && d >= thresh; j += 4) {
- COMPUTE_GMM_MAP(0);
- COMPUTE_GMM_MAP(1);
- COMPUTE_GMM_MAP(2);
- COMPUTE_GMM_MAP(3);
- COMPUTE_GMM_REDUCE(0);
- COMPUTE_GMM_REDUCE(1);
- COMPUTE_GMM_REDUCE(2);
- COMPUTE_GMM_REDUCE(3);
- var += 4;
- obs += 4;
- mean += 4;
- }
- if (j < ceplen) {
- /* terminated early, so not in topn */
- mean += (ceplen - j);
- var += (ceplen - j);
- continue;
- }
- if (d < thresh)
- continue;
- for (i = 0; i < s->max_topn; i++) {
- /* already there, so don't need to insert */
- if (topn[i].cw == cw)
- break;
- }
- if (i < s->max_topn)
- continue; /* already there. Don't insert */
- insertion_sort_cb(&cur, worst, best, cw, (int32)d);
- }
-
- return best->score;
-}
-
-/**
- * Compute top-N densities for active codebooks (and prune)
- */
-static int
-ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame)
-{
- int i, j;
-
- /* First evaluate top-N from previous frame. */
- for (i = 0; i < s->g->n_mgau; ++i)
- for (j = 0; j < s->g->n_feat; ++j)
- eval_topn(s, i, j, z[j]);
-
- /* If frame downsampling is in effect, possibly do nothing else. */
- if (frame % s->ds_ratio)
- return 0;
-
- /* Evaluate remaining codebooks. */
- for (i = 0; i < s->g->n_mgau; ++i) {
- if (bitvec_is_clear(s->f->mgau_active, i))
- continue;
- for (j = 0; j < s->g->n_feat; ++j) {
- eval_cb(s, i, j, z[j]);
- }
- }
- return 0;
-}
-
-/**
- * Normalize densities to produce "posterior probabilities",
- * i.e. things with a reasonable dynamic range, then scale and
- * clamp them to the acceptable range. This is actually done
- * solely to ensure that we can use fast_logmath_add(). Note that
- * unless we share the same normalizer across all codebooks for
- * each feature stream we get defective scores (that's why these
- * loops are inside out - doing it per-feature should give us
- * greater precision). */
-static int
-ptm_mgau_codebook_norm(ptm_mgau_t *s, mfcc_t **z, int frame)
-{
- int i, j;
-
- for (j = 0; j < s->g->n_feat; ++j) {
- int32 norm = WORST_SCORE;
- for (i = 0; i < s->g->n_mgau; ++i) {
- if (bitvec_is_clear(s->f->mgau_active, i))
- continue;
- if (norm < s->f->topn[i][j][0].score >> SENSCR_SHIFT)
- norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT;
- }
- assert(norm != WORST_SCORE);
- for (i = 0; i < s->g->n_mgau; ++i) {
- int32 k;
- if (bitvec_is_clear(s->f->mgau_active, i))
- continue;
- for (k = 0; k < s->max_topn; ++k) {
- s->f->topn[i][j][k].score >>= SENSCR_SHIFT;
- s->f->topn[i][j][k].score -= norm;
- s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score;
- if (s->f->topn[i][j][k].score > MAX_NEG_ASCR)
- s->f->topn[i][j][k].score = MAX_NEG_ASCR;
- }
- }
- }
-
- return 0;
-}
-
-static int
-ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active,
- int32 n_senone_active, int compallsen)
-{
- int i, lastsen;
-
- if (compallsen) {
- bitvec_set_all(s->f->mgau_active, s->g->n_mgau);
- return 0;
- }
- bitvec_clear_all(s->f->mgau_active, s->g->n_mgau);
- for (lastsen = i = 0; i < n_senone_active; ++i) {
- int sen = senone_active[i] + lastsen;
- int cb = s->sen2cb[sen];
- bitvec_set(s->f->mgau_active, cb);
- lastsen = sen;
- }
- E_DEBUG(1, ("Active codebooks:"));
- for (i = 0; i < s->g->n_mgau; ++i) {
- if (bitvec_is_clear(s->f->mgau_active, i))
- continue;
- E_DEBUGCONT(1, (" %d", i));
- }
- E_DEBUGCONT(1, ("\n"));
- return 0;
-}
-
-/**
- * Compute senone scores from top-N densities for active codebooks.
- */
-static int
-ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores,
- uint8 *senone_active, int32 n_senone_active,
- int compall)
-{
- int i, lastsen, bestscore;
-
- memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
- /* FIXME: This is the non-cache-efficient way to do this. We want
- * to evaluate one codeword at a time but this requires us to have
- * a reverse codebook to senone mapping, which we don't have
- * (yet), since different codebooks have different top-N
- * codewords. */
- if (compall)
- n_senone_active = s->n_sen;
- bestscore = 0x7fffffff;
- for (lastsen = i = 0; i < n_senone_active; ++i) {
- int sen, f, cb;
- int ascore;
-
- if (compall)
- sen = i;
- else
- sen = senone_active[i] + lastsen;
- lastsen = sen;
- cb = s->sen2cb[sen];
-
- if (bitvec_is_clear(s->f->mgau_active, cb)) {
- int j;
- /* Because senone_active is deltas we can't really "knock
- * out" senones from pruned codebooks, and in any case,
- * it wouldn't make any difference to the search code,
- * which doesn't expect senone_active to change. */
- for (f = 0; f < s->g->n_feat; ++f) {
- for (j = 0; j < s->max_topn; ++j) {
- s->f->topn[cb][f][j].score = MAX_NEG_ASCR;
- }
- }
- }
- /* For each feature, log-sum codeword scores + mixw to get
- * feature density, then sum (multiply) to get ascore */
- ascore = 0;
- for (f = 0; f < s->g->n_feat; ++f) {
- ptm_topn_t *topn;
- int j, fden = 0;
- topn = s->f->topn[cb][f];
- for (j = 0; j < s->max_topn; ++j) {
- int mixw;
- /* Find mixture weight for this codeword. */
- if (s->mixw_cb) {
- int dcw = s->mixw[f][topn[j].cw][sen/2];
- dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
- mixw = s->mixw_cb[dcw];
- }
- else {
- mixw = s->mixw[f][topn[j].cw][sen];
- }
- if (j == 0)
- fden = mixw + topn[j].score;
- else
- fden = fast_logmath_add(s->lmath_8b, fden,
- mixw + topn[j].score);
- E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n",
- sen, f, mixw, topn[j].score, fden));
- }
- ascore += fden;
- }
- if (ascore < bestscore) bestscore = ascore;
- senone_scores[sen] = ascore;
- }
- /* Normalize the scores again (finishing the job we started above
- * in ptm_mgau_codebook_eval...) */
- for (i = 0; i < s->n_sen; ++i) {
- senone_scores[i] -= bestscore;
- }
-
- return 0;
-}
-
-/**
- * Compute senone scores for the active senones.
- */
-int32
-ptm_mgau_frame_eval(ps_mgau_t *ps,
- int16 *senone_scores,
- uint8 *senone_active,
- int32 n_senone_active,
- mfcc_t ** featbuf, int32 frame,
- int32 compallsen)
-{
- ptm_mgau_t *s = (ptm_mgau_t *)ps;
- int fast_eval_idx;
-
- /* Find the appropriate frame in the rotating history buffer
- * corresponding to the requested input frame. No bounds checking
- * is done here, which just means you'll get semi-random crap if
- * you request a frame in the future or one that's too far in the
- * past. Since the history buffer is just used for fast match
- * that might not be fatal. */
- fast_eval_idx = frame % s->n_fast_hist;
- s->f = s->hist + fast_eval_idx;
- /* Compute the top-N codewords for every codebook, unless this
- * is a past frame, in which case we already have them (we
- * hope!) */
- if (frame >= ps_mgau_base(ps)->frame_idx) {
- ptm_fast_eval_t *lastf;
- /* Get the previous frame's top-N information (on the
- * first frame of the input this is just all WORST_DIST,
- * no harm in that) */
- if (fast_eval_idx == 0)
- lastf = s->hist + s->n_fast_hist - 1;
- else
- lastf = s->hist + fast_eval_idx - 1;
- /* Copy in initial top-N info */
- memcpy(s->f->topn[0][0], lastf->topn[0][0],
- s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t));
- /* Generate initial active codebook list (this might not be
- * necessary) */
- ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
- /* Now evaluate top-N, prune, and evaluate remaining codebooks. */
- ptm_mgau_codebook_eval(s, featbuf, frame);
- ptm_mgau_codebook_norm(s, featbuf, frame);
- }
- /* Evaluate intersection of active senones and active codebooks. */
- ptm_mgau_senone_eval(s, senone_scores, senone_active,
- n_senone_active, compallsen);
-
- return 0;
-}
-
-static int32
-read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file)
-{
- FILE *fp;
- char line[1000];
- int32 i, n, r, c;
- int32 do_swap, do_mmap;
- size_t offset;
- int n_clust = 0;
- int n_feat = s->g->n_feat;
- int n_density = s->g->n_density;
- int n_sen = bin_mdef_n_sen(mdef);
- int n_bits = 8;
-
- s->n_sen = n_sen; /* FIXME: Should have been done earlier */
- do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
-
- if ((fp = fopen(file, "rb")) == NULL)
- return -1;
-
- E_INFO("Loading senones from dump file %s\n", file);
- /* Read title size, title */
- if (fread(&n, sizeof(int32), 1, fp) != 1) {
- E_ERROR_SYSTEM("Failed to read title size from %s", file);
- goto error_out;
- }
- /* This is extremely bogus */
- do_swap = 0;
- if (n < 1 || n > 999) {
- SWAP_INT32(&n);
- if (n < 1 || n > 999) {
- E_ERROR("Title length %x in dump file %s out of range\n", n, file);
- goto error_out;
- }
- do_swap = 1;
- }
- if (fread(line, sizeof(char), n, fp) != n) {
- E_ERROR_SYSTEM("Cannot read title");
- goto error_out;
- }
- if (line[n - 1] != '\0') {
- E_ERROR("Bad title in dump file\n");
- goto error_out;
- }
- E_INFO("%s\n", line);
-
- /* Read header size, header */
- if (fread(&n, sizeof(n), 1, fp) != 1) {
- E_ERROR_SYSTEM("Failed to read header size from %s", file);
- goto error_out;
- }
- if (do_swap) SWAP_INT32(&n);
- if (fread(line, sizeof(char), n, fp) != n) {
- E_ERROR_SYSTEM("Cannot read header");
- goto error_out;
- }
- if (line[n - 1] != '\0') {
- E_ERROR("Bad header in dump file\n");
- goto error_out;
- }
-
- /* Read other header strings until string length = 0 */
- for (;;) {
- if (fread(&n, sizeof(n), 1, fp) != 1) {
- E_ERROR_SYSTEM("Failed to read header string size from %s", file);
- goto error_out;
- }
- if (do_swap) SWAP_INT32(&n);
- if (n == 0)
- break;
- if (fread(line, sizeof(char), n, fp) != n) {
- E_ERROR_SYSTEM("Cannot read header");
- goto error_out;
- }
- /* Look for a cluster count, if present */
- if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
- n_feat = atoi(line + strlen("feature_count "));
- }
- if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
- n_density = atoi(line + strlen("mixture_count "));
- }
- if (!strncmp(line, "model_count ", strlen("model_count "))) {
- n_sen = atoi(line + strlen("model_count "));
- }
- if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
- n_clust = atoi(line + strlen("cluster_count "));
- }
- if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
- n_bits = atoi(line + strlen("cluster_bits "));
- }
- }
-
- /* Defaults for #rows, #columns in mixw array. */
- c = n_sen;
- r = n_density;
- if (n_clust == 0) {
- /* Older mixw files have them here, and they might be padded. */
- if (fread(&r, sizeof(r), 1, fp) != 1) {
- E_ERROR_SYSTEM("Cannot read #rows");
- goto error_out;
- }
- if (do_swap) SWAP_INT32(&r);
- if (fread(&c, sizeof(c), 1, fp) != 1) {
- E_ERROR_SYSTEM("Cannot read #columns");
- goto error_out;
- }
- if (do_swap) SWAP_INT32(&c);
- E_INFO("Rows: %d, Columns: %d\n", r, c);
- }
-
- if (n_feat != s->g->n_feat) {
- E_ERROR("Number of feature streams mismatch: %d != %d\n",
- n_feat, s->g->n_feat);
- goto error_out;
- }
- if (n_density != s->g->n_density) {
- E_ERROR("Number of densities mismatch: %d != %d\n",
- n_density, s->g->n_density);
- goto error_out;
- }
- if (n_sen != s->n_sen) {
- E_ERROR("Number of senones mismatch: %d != %d\n",
- n_sen, s->n_sen);
- goto error_out;
- }
-
- if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
- E_ERROR("Cluster count must be 0, 15, or 16\n");
- goto error_out;
- }
- if (n_clust == 15)
- ++n_clust;
-
- if (!((n_bits == 8) || (n_bits == 4))) {
- E_ERROR("Cluster count must be 4 or 8\n");
- goto error_out;
- }
-
- if (do_mmap) {
- E_INFO("Using memory-mapped I/O for senones\n");
- }
- offset = ftell(fp);
-
- /* Allocate memory for pdfs (or memory map them) */
- if (do_mmap) {
- s->sendump_mmap = mmio_file_read(file);
- /* Get cluster codebook if any. */
- if (n_clust) {
- s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
- offset += n_clust;
- }
- }
- else {
- /* Get cluster codebook if any. */
- if (n_clust) {
- s->mixw_cb = ckd_calloc(1, n_clust);
- if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
- E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
- goto error_out;
- }
- }
- }
-
- /* Set up pointers, or read, or whatever */
- if (s->sendump_mmap) {
- s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
- for (n = 0; n < n_feat; n++) {
- int step = c;
- if (n_bits == 4)
- step = (step + 1) / 2;
- for (i = 0; i < r; i++) {
- s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
- offset += step;
- }
- }
- }
- else {
- s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
- /* Read pdf values and ids */
- for (n = 0; n < n_feat; n++) {
- int step = c;
- if (n_bits == 4)
- step = (step + 1) / 2;
- for (i = 0; i < r; i++) {
- if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
- != (size_t) step) {
- E_ERROR("Failed to read %d bytes from sendump\n", step);
- goto error_out;
- }
- }
- }
- }
-
- fclose(fp);
- return 0;
-error_out:
- fclose(fp);
- return -1;
-}
-
-static int32
-read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin)
-{
- char **argname, **argval;
- char eofchk;
- FILE *fp;
- int32 byteswap, chksum_present;
- uint32 chksum;
- float32 *pdf;
- int32 i, f, c, n;
- int32 n_sen;
- int32 n_feat;
- int32 n_comp;
- int32 n_err;
-
- E_INFO("Reading mixture weights file '%s'\n", file_name);
-
- if ((fp = fopen(file_name, "rb")) == NULL)
- E_FATAL_SYSTEM("Failed to open mixture file '%s' for reading", file_name);
-
- /* Read header, including argument-value info and 32-bit byteorder magic */
- if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
- E_FATAL("Failed to read header from '%s'\n", file_name);
-
- /* Parse argument-value list */
- chksum_present = 0;
- for (i = 0; argname[i]; i++) {
- if (strcmp(argname[i], "version") == 0) {
- if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
- E_WARN("Version mismatch(%s): %s, expecting %s\n",
- file_name, argval[i], MGAU_MIXW_VERSION);
- }
- else if (strcmp(argname[i], "chksum0") == 0) {
- chksum_present = 1; /* Ignore the associated value */
- }
- }
- bio_hdrarg_free(argname, argval);
- argname = argval = NULL;
-
- chksum = 0;
-
- /* Read #senones, #features, #codewords, arraysize */
- if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
- || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
- 1)
- || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
- 1)
- || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
- E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
- }
- if (n_feat != s->g->n_feat)
- E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
- if (n != n_sen * n_feat * n_comp) {
- E_FATAL
- ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
- file_name, i, n_sen, n_feat, n_comp);
- }
-
- /* n_sen = number of mixture weights per codeword, which is
- * fixed at the number of senones since we have only one codebook.
- */
- s->n_sen = n_sen;
-
- /* Quantized mixture weight arrays. */
- s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density,
- n_sen, sizeof(***s->mixw));
-
- /* Temporary structure to read in floats before conversion to (int32) logs3 */
- pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
-
- /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
- n_err = 0;
- for (i = 0; i < n_sen; i++) {
- for (f = 0; f < n_feat; f++) {
- if (bio_fread((void *) pdf, sizeof(float32),
- n_comp, fp, byteswap, &chksum) != n_comp) {
- E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
- }
-
- /* Normalize and floor */
- if (vector_sum_norm(pdf, n_comp) <= 0.0)
- n_err++;
- vector_floor(pdf, n_comp, SmoothMin);
- vector_sum_norm(pdf, n_comp);
-
- /* Convert to LOG, quantize, and transpose */
- for (c = 0; c < n_comp; c++) {
- int32 qscr;
-
- qscr = -logmath_log(s->lmath_8b, pdf[c]);
- if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
- qscr = MAX_NEG_MIXW;
- s->mixw[f][c][i] = qscr;
- }
- }
- }
- if (n_err > 0)
- E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
-
- ckd_free(pdf);
-
- if (chksum_present)
- bio_verify_chksum(fp, byteswap, chksum);
-
- if (fread(&eofchk, 1, 1, fp) == 1)
- E_FATAL("More data than expected in %s\n", file_name);
-
- fclose(fp);
-
- E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
- return n_sen;
-}
-
-ps_mgau_t *
-ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef)
-{
- ptm_mgau_t *s;
- ps_mgau_t *ps;
- char const *sendump_path;
- int i;
-
- s = ckd_calloc(1, sizeof(*s));
- s->config = acmod->config;
-
- s->lmath = logmath_retain(acmod->lmath);
- /* Log-add table. */
- s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
- if (s->lmath_8b == NULL)
- goto error_out;
- /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
- if (logmath_get_width(s->lmath_8b) != 1) {
- E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
- logmath_get_base(s->lmath_8b));
- goto error_out;
- }
-
- /* Read means and variances. */
- if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"),
- cmd_ln_str_r(s->config, "-var"),
- cmd_ln_float32_r(s->config, "-varfloor"),
- s->lmath)) == NULL)
- goto error_out;
- /* We only support 256 codebooks or less (like 640k or 2GB, this
- * should be enough for anyone) */
- if (s->g->n_mgau > 256) {
- E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau);
- goto error_out;
- }
- if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) {
- E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef));
- goto error_out;
- }
- /* Verify n_feat and veclen, against acmod. */
- if (s->g->n_feat != feat_dimension1(acmod->fcb)) {
- E_ERROR("Number of streams does not match: %d != %d\n",
- s->g->n_feat, feat_dimension1(acmod->fcb));
- goto error_out;
- }
- for (i = 0; i < s->g->n_feat; ++i) {
- if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
- E_ERROR("Dimension of stream %d does not match: %d != %d\n",
- s->g->featlen[i], feat_dimension2(acmod->fcb, i));
- goto error_out;
- }
- }
- /* Read mixture weights. */
- if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
- if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
- goto error_out;
- }
- }
- else {
- if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
- cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
- goto error_out;
- }
- }
- s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
- s->max_topn = cmd_ln_int32_r(s->config, "-topn");
- E_INFO("Maximum top-N: %d\n", s->max_topn);
-
- /* Assume mapping of senones to their base phones, though this
- * will become more flexible in the future. */
- s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb));
- for (i = 0; i < s->n_sen; ++i)
- s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i);
-
- /* Allocate fast-match history buffers. We need enough for the
- * phoneme lookahead window, plus the current frame, plus one for
- * good measure? (FIXME: I don't remember why) */
- s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
- s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist));
- /* s->f will be a rotating pointer into s->hist. */
- s->f = s->hist;
- for (i = 0; i < s->n_fast_hist; ++i) {
- int j, k, m;
- /* Top-N codewords for every codebook and feature. */
- s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat,
- s->max_topn, sizeof(ptm_topn_t));
- /* Initialize them to sane (yet arbitrary) defaults. */
- for (j = 0; j < s->g->n_mgau; ++j) {
- for (k = 0; k < s->g->n_feat; ++k) {
- for (m = 0; m < s->max_topn; ++m) {
- s->hist[i].topn[j][k][m].cw = m;
- s->hist[i].topn[j][k][m].score = WORST_DIST;
- }
- }
- }
- /* Active codebook mapping (just codebook, not features,
- at least not yet) */
- s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau);
- /* Start with them all on, prune them later. */
- bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau);
- }
-
- ps = (ps_mgau_t *)s;
- ps->vt = &ptm_mgau_funcs;
- return ps;
-error_out:
- ptm_mgau_free(ps_mgau_base(s));
- return NULL;
-}
-
-int
-ptm_mgau_mllr_transform(ps_mgau_t *ps,
- ps_mllr_t *mllr)
-{
- ptm_mgau_t *s = (ptm_mgau_t *)ps;
- return gauden_mllr_transform(s->g, mllr, s->config);
-}
-
-void
-ptm_mgau_free(ps_mgau_t *ps)
-{
- int i;
- ptm_mgau_t *s = (ptm_mgau_t *)ps;
-
- logmath_free(s->lmath);
- logmath_free(s->lmath_8b);
- if (s->sendump_mmap) {
- ckd_free_2d(s->mixw);
- mmio_file_unmap(s->sendump_mmap);
- }
- else {
- ckd_free_3d(s->mixw);
- }
- ckd_free(s->sen2cb);
-
- for (i = 0; i < s->n_fast_hist; i++) {
- ckd_free_3d(s->hist[i].topn);
- bitvec_free(s->hist[i].mgau_active);
- }
- ckd_free(s->hist);
-
- gauden_free(s->g);
- ckd_free(s);
-}