summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/feat
diff options
context:
space:
mode:
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/feat')
-rw-r--r--media/sphinxbase/src/libsphinxbase/feat/agc.c227
-rw-r--r--media/sphinxbase/src/libsphinxbase/feat/cmn.c238
-rw-r--r--media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c184
-rw-r--r--media/sphinxbase/src/libsphinxbase/feat/feat.c1497
-rw-r--r--media/sphinxbase/src/libsphinxbase/feat/lda.c158
5 files changed, 2304 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/feat/agc.c b/media/sphinxbase/src/libsphinxbase/feat/agc.c
new file mode 100644
index 000000000..271baf49d
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/feat/agc.c
@@ -0,0 +1,227 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * agc.c -- Various forms of automatic gain control (AGC)
+ *
+ * **********************************************
+ * CMU ARPA Speech Project
+ *
+ * Copyright (c) 1996 Carnegie Mellon University.
+ * ALL RIGHTS RESERVED.
+ * **********************************************
+ *
+ * HISTORY
+ * $Log$
+ * Revision 1.5 2005/06/21 19:25:41 arthchan2003
+ * 1, Fixed doxygen documentation. 2, Added $ keyword.
+ *
+ * Revision 1.3 2005/03/30 01:22:46 archan
+ * Fixed mistakes in last updates. Add
+ *
+ *
+ * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Created.
+ */
+
+#include <string.h>
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "sphinxbase/err.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/agc.h"
+
+/* NOTE! These must match the enum in agc.h */
+const char *agc_type_str[] = {
+ "none",
+ "max",
+ "emax",
+ "noise"
+};
+static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]);
+
+agc_type_t
+agc_type_from_str(const char *str)
+{
+ int i;
+
+ for (i = 0; i < n_agc_type_str; ++i) {
+ if (0 == strcmp(str, agc_type_str[i]))
+ return (agc_type_t)i;
+ }
+ E_FATAL("Unknown AGC type '%s'\n", str);
+ return AGC_NONE;
+}
+
+agc_t *agc_init(void)
+{
+ agc_t *agc;
+ agc = ckd_calloc(1, sizeof(*agc));
+ agc->noise_thresh = FLOAT2MFCC(2.0);
+
+ return agc;
+}
+
+void agc_free(agc_t *agc)
+{
+ ckd_free(agc);
+}
+
+/**
+ * Normalize c0 for all frames such that max(c0) = 0.
+ */
+void
+agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
+{
+ int32 i;
+
+ if (n_frame <= 0)
+ return;
+ agc->obs_max = mfc[0][0];
+ for (i = 1; i < n_frame; i++) {
+ if (mfc[i][0] > agc->obs_max) {
+ agc->obs_max = mfc[i][0];
+ agc->obs_frame = 1;
+ }
+ }
+
+ E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max);
+ for (i = 0; i < n_frame; i++)
+ mfc[i][0] -= agc->obs_max;
+}
+
+void
+agc_emax_set(agc_t *agc, float32 m)
+{
+ agc->max = FLOAT2MFCC(m);
+ E_INFO("AGCEMax: max= %.2f\n", m);
+}
+
+float32
+agc_emax_get(agc_t *agc)
+{
+ return MFCC2FLOAT(agc->max);
+}
+
+void
+agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
+{
+ int i;
+
+ if (n_frame <= 0)
+ return;
+ for (i = 0; i < n_frame; ++i) {
+ if (mfc[i][0] > agc->obs_max) {
+ agc->obs_max = mfc[i][0];
+ agc->obs_frame = 1;
+ }
+ mfc[i][0] -= agc->max;
+ }
+}
+
+/* Update estimated max for next utterance */
+void
+agc_emax_update(agc_t *agc)
+{
+ if (agc->obs_frame) { /* Update only if some data observed */
+ agc->obs_max_sum += agc->obs_max;
+ agc->obs_utt++;
+
+ /* Re-estimate max over past history; decay the history */
+ agc->max = agc->obs_max_sum / agc->obs_utt;
+ if (agc->obs_utt == 16) {
+ agc->obs_max_sum /= 2;
+ agc->obs_utt = 8;
+ }
+ }
+ E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);
+
+ /* Reset the accumulators for the next utterance. */
+ agc->obs_frame = 0;
+ agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
+}
+
+void
+agc_noise(agc_t *agc,
+ mfcc_t **cep,
+ int32 nfr)
+{
+ mfcc_t min_energy; /* Minimum log-energy */
+ mfcc_t noise_level; /* Average noise_level */
+ int32 i; /* frame index */
+ int32 noise_frames; /* Number of noise frames */
+
+ /* Determine minimum log-energy in utterance */
+ min_energy = cep[0][0];
+ for (i = 0; i < nfr; ++i) {
+ if (cep[i][0] < min_energy)
+ min_energy = cep[i][0];
+ }
+
+ /* Average all frames between min_energy and min_energy + agc->noise_thresh */
+ noise_frames = 0;
+ noise_level = 0;
+ min_energy += agc->noise_thresh;
+ for (i = 0; i < nfr; ++i) {
+ if (cep[i][0] < min_energy) {
+ noise_level += cep[i][0];
+ noise_frames++;
+ }
+ }
+
+ if (noise_frames > 0) {
+ noise_level /= noise_frames;
+ E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level));
+ /* Subtract noise_level from all log_energy values */
+ for (i = 0; i < nfr; i++) {
+ cep[i][0] -= noise_level;
+ }
+ }
+}
+
+void
+agc_set_threshold(agc_t *agc, float32 threshold)
+{
+ agc->noise_thresh = FLOAT2MFCC(threshold);
+}
+
+float32
+agc_get_threshold(agc_t *agc)
+{
+ return FLOAT2MFCC(agc->noise_thresh);
+}
diff --git a/media/sphinxbase/src/libsphinxbase/feat/cmn.c b/media/sphinxbase/src/libsphinxbase/feat/cmn.c
new file mode 100644
index 000000000..c133c19a3
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/feat/cmn.c
@@ -0,0 +1,238 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * cmn.c -- Various forms of cepstral mean normalization
+ *
+ * **********************************************
+ * CMU ARPA Speech Project
+ *
+ * Copyright (c) 1996 Carnegie Mellon University.
+ * ALL RIGHTS RESERVED.
+ * **********************************************
+ *
+ * HISTORY
+ * $Log$
+ * Revision 1.14 2006/02/24 15:57:47 egouvea
+ * Removed cmn = NULL from the cmn_free(), since it's pointless (my bad!).
+ *
+ * Removed cmn_prior, which was surrounded by #if 0/#endif, since the
+ * function is already in cmn_prior.c
+ *
+ * Revision 1.13 2006/02/23 03:47:49 arthchan2003
+ * Used Evandro's changes. Resolved conflicts.
+ *
+ *
+ * Revision 1.12 2006/02/23 00:48:23 egouvea
+ * Replaced loops resetting vectors with the more efficient memset()
+ *
+ * Revision 1.11 2006/02/22 23:43:55 arthchan2003
+ * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH: Put data structure into the cmn_t structure.
+ *
+ * Revision 1.10.4.2 2005/10/17 04:45:57 arthchan2003
+ * Free stuffs in cmn and feat corectly.
+ *
+ * Revision 1.10.4.1 2005/07/05 06:25:08 arthchan2003
+ * Fixed dox-doc.
+ *
+ * Revision 1.10 2005/06/21 19:28:00 arthchan2003
+ * 1, Fixed doxygen documentation. 2, Added $ keyword.
+ *
+ * Revision 1.3 2005/03/30 01:22:46 archan
+ * Fixed mistakes in last updates. Add
+ *
+ *
+ * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
+ * Added cmn_free() and moved *mean and *var out global space and named them cmn_mean and cmn_var
+ *
+ * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Changed the name norm_mean() to cmn().
+ *
+ * 19-Jun-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Changed to compute CMN over ALL dimensions of cep instead of 1..12.
+ *
+ * 04-Nov-1995 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Created.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4244)
+#endif
+
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/err.h"
+#include "sphinxbase/cmn.h"
+
+/* NOTE! These must match the enum in cmn.h */
+const char *cmn_type_str[] = {
+ "none",
+ "current",
+ "prior"
+};
+static const int n_cmn_type_str = sizeof(cmn_type_str)/sizeof(cmn_type_str[0]);
+
+cmn_type_t
+cmn_type_from_str(const char *str)
+{
+ int i;
+
+ for (i = 0; i < n_cmn_type_str; ++i) {
+ if (0 == strcmp(str, cmn_type_str[i]))
+ return (cmn_type_t)i;
+ }
+ E_FATAL("Unknown CMN type '%s'\n", str);
+ return CMN_NONE;
+}
+
+cmn_t *
+cmn_init(int32 veclen)
+{
+ cmn_t *cmn;
+ cmn = (cmn_t *) ckd_calloc(1, sizeof(cmn_t));
+ cmn->veclen = veclen;
+ cmn->cmn_mean = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t));
+ cmn->cmn_var = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t));
+ cmn->sum = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t));
+ /* A front-end dependent magic number */
+ cmn->cmn_mean[0] = FLOAT2MFCC(12.0);
+ cmn->nframe = 0;
+ E_INFO("mean[0]= %.2f, mean[1..%d]= 0.0\n",
+ MFCC2FLOAT(cmn->cmn_mean[0]), veclen - 1);
+
+ return cmn;
+}
+
+
+void
+cmn(cmn_t *cmn, mfcc_t ** mfc, int32 varnorm, int32 n_frame)
+{
+ mfcc_t *mfcp;
+ mfcc_t t;
+ int32 i, f;
+ int32 n_pos_frame;
+
+ assert(mfc != NULL);
+
+ if (n_frame <= 0)
+ return;
+
+ /* If cmn->cmn_mean wasn't NULL, we need to zero the contents */
+ memset(cmn->cmn_mean, 0, cmn->veclen * sizeof(mfcc_t));
+
+ /* Find mean cep vector for this utterance */
+ for (f = 0, n_pos_frame = 0; f < n_frame; f++) {
+ mfcp = mfc[f];
+
+ /* Skip zero energy frames */
+ if (mfcp[0] < 0)
+ continue;
+
+ for (i = 0; i < cmn->veclen; i++) {
+ cmn->cmn_mean[i] += mfcp[i];
+ }
+
+ n_pos_frame++;
+ }
+
+ for (i = 0; i < cmn->veclen; i++)
+ cmn->cmn_mean[i] /= n_pos_frame;
+
+ E_INFO("CMN: ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT("\n");
+ if (!varnorm) {
+ /* Subtract mean from each cep vector */
+ for (f = 0; f < n_frame; f++) {
+ mfcp = mfc[f];
+ for (i = 0; i < cmn->veclen; i++)
+ mfcp[i] -= cmn->cmn_mean[i];
+ }
+ }
+ else {
+ /* Scale cep vectors to have unit variance along each dimension, and subtract means */
+ /* If cmn->cmn_var wasn't NULL, we need to zero the contents */
+ memset(cmn->cmn_var, 0, cmn->veclen * sizeof(mfcc_t));
+
+ for (f = 0; f < n_frame; f++) {
+ mfcp = mfc[f];
+
+ for (i = 0; i < cmn->veclen; i++) {
+ t = mfcp[i] - cmn->cmn_mean[i];
+ cmn->cmn_var[i] += MFCCMUL(t, t);
+ }
+ }
+ for (i = 0; i < cmn->veclen; i++)
+ /* Inverse Std. Dev, RAH added type case from sqrt */
+ cmn->cmn_var[i] = FLOAT2MFCC(sqrt((float64)n_frame / MFCC2FLOAT(cmn->cmn_var[i])));
+
+ for (f = 0; f < n_frame; f++) {
+ mfcp = mfc[f];
+ for (i = 0; i < cmn->veclen; i++)
+ mfcp[i] = MFCCMUL((mfcp[i] - cmn->cmn_mean[i]), cmn->cmn_var[i]);
+ }
+ }
+}
+
+/*
+ * RAH, free previously allocated memory
+ */
+void
+cmn_free(cmn_t * cmn)
+{
+ if (cmn != NULL) {
+ if (cmn->cmn_var)
+ ckd_free((void *) cmn->cmn_var);
+
+ if (cmn->cmn_mean)
+ ckd_free((void *) cmn->cmn_mean);
+
+ if (cmn->sum)
+ ckd_free((void *) cmn->sum);
+
+ ckd_free((void *) cmn);
+ }
+}
diff --git a/media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c b/media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c
new file mode 100644
index 000000000..9d1801aa7
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c
@@ -0,0 +1,184 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*************************************************
+ * CMU ARPA Speech Project
+ *
+ * Copyright (c) 2000 Carnegie Mellon University.
+ * ALL RIGHTS RESERVED.
+ * **********************************************
+ *
+ * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
+ * Created
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4244)
+#endif
+
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/err.h"
+#include "sphinxbase/cmn.h"
+
+void
+cmn_prior_set(cmn_t *cmn, mfcc_t const * vec)
+{
+ int32 i;
+
+ E_INFO("cmn_prior_set: from < ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT(">\n");
+
+ for (i = 0; i < cmn->veclen; i++) {
+ cmn->cmn_mean[i] = vec[i];
+ cmn->sum[i] = vec[i] * CMN_WIN;
+ }
+ cmn->nframe = CMN_WIN;
+
+ E_INFO("cmn_prior_set: to < ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT(">\n");
+}
+
+void
+cmn_prior_get(cmn_t *cmn, mfcc_t * vec)
+{
+ int32 i;
+
+ for (i = 0; i < cmn->veclen; i++)
+ vec[i] = cmn->cmn_mean[i];
+
+}
+
+static void
+cmn_prior_shiftwin(cmn_t *cmn)
+{
+ mfcc_t sf;
+ int32 i;
+
+ E_INFO("cmn_prior_update: from < ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT(">\n");
+
+ sf = FLOAT2MFCC(1.0) / cmn->nframe;
+ for (i = 0; i < cmn->veclen; i++)
+ cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf */
+
+ /* Make the accumulation decay exponentially */
+ if (cmn->nframe >= CMN_WIN_HWM) {
+ sf = CMN_WIN * sf;
+ for (i = 0; i < cmn->veclen; i++)
+ cmn->sum[i] = MFCCMUL(cmn->sum[i], sf);
+ cmn->nframe = CMN_WIN;
+ }
+
+ E_INFO("cmn_prior_update: to < ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT(">\n");
+}
+
+void
+cmn_prior_update(cmn_t *cmn)
+{
+ mfcc_t sf;
+ int32 i;
+
+ if (cmn->nframe <= 0)
+ return;
+
+ E_INFO("cmn_prior_update: from < ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT(">\n");
+
+ /* Update mean buffer */
+ sf = FLOAT2MFCC(1.0) / cmn->nframe;
+ for (i = 0; i < cmn->veclen; i++)
+ cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf; */
+
+ /* Make the accumulation decay exponentially */
+ if (cmn->nframe > CMN_WIN_HWM) {
+ sf = CMN_WIN * sf;
+ for (i = 0; i < cmn->veclen; i++)
+ cmn->sum[i] = MFCCMUL(cmn->sum[i], sf);
+ cmn->nframe = CMN_WIN;
+ }
+
+ E_INFO("cmn_prior_update: to < ");
+ for (i = 0; i < cmn->veclen; i++)
+ E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i]));
+ E_INFOCONT(">\n");
+}
+
+void
+cmn_prior(cmn_t *cmn, mfcc_t **incep, int32 varnorm, int32 nfr)
+{
+ int32 i, j;
+
+ if (nfr <= 0)
+ return;
+
+ if (varnorm)
+ E_FATAL
+ ("Variance normalization not implemented in live mode decode\n");
+
+ for (i = 0; i < nfr; i++) {
+
+ /* Skip zero energy frames */
+ if (incep[i][0] < 0)
+ continue;
+
+ for (j = 0; j < cmn->veclen; j++) {
+ cmn->sum[j] += incep[i][j];
+ incep[i][j] -= cmn->cmn_mean[j];
+ }
+
+ ++cmn->nframe;
+ }
+
+ /* Shift buffer down if we have more than CMN_WIN_HWM frames */
+ if (cmn->nframe > CMN_WIN_HWM)
+ cmn_prior_shiftwin(cmn);
+}
diff --git a/media/sphinxbase/src/libsphinxbase/feat/feat.c b/media/sphinxbase/src/libsphinxbase/feat/feat.c
new file mode 100644
index 000000000..d2252fd85
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/feat/feat.c
@@ -0,0 +1,1497 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * feat.c -- Feature vector description and cepstra->feature computation.
+ *
+ * **********************************************
+ * CMU ARPA Speech Project
+ *
+ * Copyright (c) 1996 Carnegie Mellon University.
+ * ALL RIGHTS RESERVED.
+ * **********************************************
+ *
+ * HISTORY
+ * $Log$
+ * Revision 1.22 2006/02/23 03:59:40 arthchan2003
+ * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
+ *
+ * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003
+ * Free stuffs in cmn and feat corectly.
+ *
+ * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003
+ * Add message to show the directory which the feature is searched for.
+ *
+ * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003
+ * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point.
+ *
+ * Revision 1.21 2005/06/22 03:29:35 arthchan2003
+ * Makefile.am s for all subdirectory of libs3decoder/
+ *
+ * Revision 1.4 2005/04/21 23:50:26 archan
+ * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
+ *
+ * Revision 1.3 2005/03/30 01:22:46 archan
+ * Fixed mistakes in last updates. Add
+ *
+ *
+ * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
+ * Adding feat_free() to free allocated memory
+ *
+ * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
+ * Modified feat_s2mfc2feat_block() to handle empty buffers at
+ * the end of an utterance
+ *
+ * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
+ * Added feat_s2mfc2feat_block() to allow feature computation
+ * from sequences of blocks of cepstral vectors
+ *
+ * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Major changes to accommodate arbitrary feature input types. Added
+ * feat_read(), moved various cep2feat functions from other files into
+ * this one. Also, made this module object-oriented with the feat_t type.
+ * Changed definition of s2mfc_read to let the caller manage MFC buffers.
+ *
+ * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Added unistd.h include.
+ *
+ * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Added check for sf argument to s2mfc_read being within file size.
+ *
+ * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Added sf, ef parameters to s2mfc_read().
+ *
+ * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Added feat_cepsize().
+ * Added different feature-handling (s2_4x, s3_1x39 at this point).
+ * Moved feature-dependent functions to feature-dependent files.
+ *
+ * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Moved constant declarations from feat.h into here.
+ *
+ * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
+ * Created.
+ */
+
+
+/*
+ * This module encapsulates different feature streams used by the Sphinx group. New
+ * stream types can be added by augmenting feat_init() and providing an accompanying
+ * compute_feat function. It also provides a "generic" feature vector definition for
+ * handling "arbitrary" speech input feature types (see the last section in feat_init()).
+ * In this case the speech input data should already be feature vectors; no computation,
+ * such as MFC->feature conversion, is available or needed.
+ */
+
+#include <assert.h>
+#include <string.h>
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4244 4996)
+#endif
+
+#include "sphinxbase/fe.h"
+#include "sphinxbase/feat.h"
+#include "sphinxbase/bio.h"
+#include "sphinxbase/pio.h"
+#include "sphinxbase/cmn.h"
+#include "sphinxbase/agc.h"
+#include "sphinxbase/err.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/prim_type.h"
+#include "sphinxbase/glist.h"
+
+#define FEAT_VERSION "1.0"
+#define FEAT_DCEP_WIN 2
+
+#ifdef DUMP_FEATURES
+static void
+cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text)
+{
+ int32 i, j;
+
+ E_INFO("%s\n", text);
+ for (i = 0; i < nfr; i++) {
+ for (j = 0; j < fcb->cepsize; j++) {
+ fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j]));
+ }
+ fprintf(stderr, "\n");
+ }
+}
+static void
+feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text)
+{
+ E_INFO("%s\n", text);
+ feat_print(fcb, feat, nfr, stderr);
+}
+#else /* !DUMP_FEATURES */
+#define cep_dump_dbg(fcb,mfc,nfr,text)
+#define feat_print_dbg(fcb,mfc,nfr,text)
+#endif
+
+int32 **
+parse_subvecs(char const *str)
+{
+ char const *strp;
+ int32 n, n2, l;
+ glist_t dimlist; /* List of dimensions in one subvector */
+ glist_t veclist; /* List of dimlists (subvectors) */
+ int32 **subvec;
+ gnode_t *gn, *gn2;
+
+ veclist = NULL;
+
+ strp = str;
+ for (;;) {
+ dimlist = NULL;
+
+ for (;;) {
+ if (sscanf(strp, "%d%n", &n, &l) != 1)
+ E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
+ strp - str);
+ strp += l;
+
+ if (*strp == '-') {
+ strp++;
+
+ if (sscanf(strp, "%d%n", &n2, &l) != 1)
+ E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
+ strp - str);
+ strp += l;
+ }
+ else
+ n2 = n;
+
+ if ((n < 0) || (n > n2))
+ E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str,
+ strp - str);
+
+ for (; n <= n2; n++) {
+ gnode_t *gn;
+ for (gn = dimlist; gn; gn = gnode_next(gn))
+ if (gnode_int32(gn) == n)
+ break;
+ if (gn != NULL)
+ E_FATAL("'%s': Duplicate dimension ending @pos %d\n",
+ str, strp - str);
+
+ dimlist = glist_add_int32(dimlist, n);
+ }
+
+ if ((*strp == '\0') || (*strp == '/'))
+ break;
+
+ if (*strp != ',')
+ E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str);
+
+ strp++;
+ }
+
+ veclist = glist_add_ptr(veclist, (void *) dimlist);
+
+ if (*strp == '\0')
+ break;
+
+ assert(*strp == '/');
+ strp++;
+ }
+
+ /* Convert the glists to arrays; remember the glists are in reverse order of the input! */
+ n = glist_count(veclist); /* #Subvectors */
+ subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */
+ subvec[n] = NULL; /* sentinel */
+
+ for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
+ gn2 = (glist_t) gnode_ptr(gn);
+
+ n2 = glist_count(gn2); /* Length of this subvector */
+ if (n2 <= 0)
+ E_FATAL("'%s': 0-length subvector\n", str);
+
+ subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */
+ subvec[n][n2] = -1; /* sentinel */
+
+ for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
+ subvec[n][n2] = gnode_int32(gn2);
+ assert((n2 < 0) && (!gn2));
+ }
+ assert((n < 0) && (!gn));
+
+ /* Free the glists */
+ for (gn = veclist; gn; gn = gnode_next(gn)) {
+ gn2 = (glist_t) gnode_ptr(gn);
+ glist_free(gn2);
+ }
+ glist_free(veclist);
+
+ return subvec;
+}
+
+void
+subvecs_free(int32 **subvecs)
+{
+ int32 **sv;
+
+ for (sv = subvecs; sv && *sv; ++sv)
+ ckd_free(*sv);
+ ckd_free(subvecs);
+}
+
+int
+feat_set_subvecs(feat_t *fcb, int32 **subvecs)
+{
+ int32 **sv;
+ uint32 n_sv, n_dim, i;
+
+ if (subvecs == NULL) {
+ subvecs_free(fcb->subvecs);
+ ckd_free(fcb->sv_buf);
+ ckd_free(fcb->sv_len);
+ fcb->n_sv = 0;
+ fcb->subvecs = NULL;
+ fcb->sv_len = NULL;
+ fcb->sv_buf = NULL;
+ fcb->sv_dim = 0;
+ return 0;
+ }
+
+ if (fcb->n_stream != 1) {
+ E_ERROR("Subvector specifications require single-stream features!");
+ return -1;
+ }
+
+ n_sv = 0;
+ n_dim = 0;
+ for (sv = subvecs; sv && *sv; ++sv) {
+ int32 *d;
+
+ for (d = *sv; d && *d != -1; ++d) {
+ ++n_dim;
+ }
+ ++n_sv;
+ }
+ if (n_dim > feat_dimension(fcb)) {
+ E_ERROR("Total dimensionality of subvector specification %d "
+ "> feature dimensionality %d\n", n_dim, feat_dimension(fcb));
+ return -1;
+ }
+
+ fcb->n_sv = n_sv;
+ fcb->subvecs = subvecs;
+ fcb->sv_len = (uint32 *)ckd_calloc(n_sv, sizeof(*fcb->sv_len));
+ fcb->sv_buf = (mfcc_t *)ckd_calloc(n_dim, sizeof(*fcb->sv_buf));
+ fcb->sv_dim = n_dim;
+ for (i = 0; i < n_sv; ++i) {
+ int32 *d;
+ for (d = subvecs[i]; d && *d != -1; ++d) {
+ ++fcb->sv_len[i];
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Project feature components to subvectors (if any).
+ */
+static void
+feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
+{
+ uint32 i;
+
+ if (fcb->subvecs == NULL)
+ return;
+ for (i = 0; i < nfr; ++i) {
+ mfcc_t *out;
+ int32 j;
+
+ out = fcb->sv_buf;
+ for (j = 0; j < fcb->n_sv; ++j) {
+ int32 *d;
+ for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
+ *out++ = inout_feat[i][0][*d];
+ }
+ }
+ memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf));
+ }
+}
+
+mfcc_t ***
+feat_array_alloc(feat_t * fcb, int32 nfr)
+{
+ int32 i, j, k;
+ mfcc_t *data, *d, ***feat;
+
+ assert(fcb);
+ assert(nfr > 0);
+ assert(feat_dimension(fcb) > 0);
+
+ /* Make sure to use the dimensionality of the features *before*
+ LDA and subvector projection. */
+ k = 0;
+ for (i = 0; i < fcb->n_stream; ++i)
+ k += fcb->stream_len[i];
+ assert(k >= feat_dimension(fcb));
+ assert(k >= fcb->sv_dim);
+
+ feat =
+ (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *));
+ data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t));
+
+ for (i = 0; i < nfr; i++) {
+ d = data + i * k;
+ for (j = 0; j < feat_dimension1(fcb); j++) {
+ feat[i][j] = d;
+ d += feat_dimension2(fcb, j);
+ }
+ }
+
+ return feat;
+}
+
+mfcc_t ***
+feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
+{
+ int32 i, k, cf;
+ mfcc_t*** new_feat;
+
+ assert(fcb);
+ assert(nfr > 0);
+ assert(ofr > 0);
+ assert(feat_dimension(fcb) > 0);
+
+ /* Make sure to use the dimensionality of the features *before*
+ LDA and subvector projection. */
+ k = 0;
+ for (i = 0; i < fcb->n_stream; ++i)
+ k += fcb->stream_len[i];
+ assert(k >= feat_dimension(fcb));
+ assert(k >= fcb->sv_dim);
+
+ new_feat = feat_array_alloc(fcb, nfr);
+
+ cf = (nfr < ofr) ? nfr : ofr;
+ memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t));
+
+ feat_array_free(old_feat);
+
+ return new_feat;
+}
+
+void
+feat_array_free(mfcc_t ***feat)
+{
+ ckd_free(feat[0][0]);
+ ckd_free_2d((void **)feat);
+}
+
+static void
+feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ mfcc_t *f;
+ mfcc_t *w, *_w;
+ mfcc_t *w1, *w_1, *_w1, *_w_1;
+ mfcc_t d1, d2;
+ int32 i, j;
+
+ assert(fcb);
+ assert(feat_cepsize(fcb) == 13);
+ assert(feat_n_stream(fcb) == 4);
+ assert(feat_stream_len(fcb, 0) == 12);
+ assert(feat_stream_len(fcb, 1) == 24);
+ assert(feat_stream_len(fcb, 2) == 3);
+ assert(feat_stream_len(fcb, 3) == 12);
+ assert(feat_window_size(fcb) == 4);
+
+ /* CEP; skip C0 */
+ memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
+
+ /*
+ * DCEP(SHORT): mfc[2] - mfc[-2]
+ * DCEP(LONG): mfc[4] - mfc[-4]
+ */
+ w = mfc[2] + 1; /* +1 to skip C0 */
+ _w = mfc[-2] + 1;
+
+ f = feat[1];
+ for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
+ f[i] = w[i] - _w[i];
+
+ w = mfc[4] + 1; /* +1 to skip C0 */
+ _w = mfc[-4] + 1;
+
+ for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */
+ f[i] = w[j] - _w[j];
+
+ /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
+ w1 = mfc[3] + 1; /* Final +1 to skip C0 */
+ _w1 = mfc[-1] + 1;
+ w_1 = mfc[1] + 1;
+ _w_1 = mfc[-3] + 1;
+
+ f = feat[3];
+ for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
+ d1 = w1[i] - _w1[i];
+ d2 = w_1[i] - _w_1[i];
+
+ f[i] = d1 - d2;
+ }
+
+ /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
+ f = feat[2];
+ f[0] = mfc[0][0];
+ f[1] = mfc[2][0] - mfc[-2][0];
+
+ d1 = mfc[3][0] - mfc[-1][0];
+ d2 = mfc[1][0] - mfc[-3][0];
+ f[2] = d1 - d2;
+}
+
+
+static void
+feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ mfcc_t *f;
+ mfcc_t *w, *_w;
+ mfcc_t *w1, *w_1, *_w1, *_w_1;
+ mfcc_t d1, d2;
+ int32 i;
+
+ assert(fcb);
+ assert(feat_cepsize(fcb) == 13);
+ assert(feat_n_stream(fcb) == 1);
+ assert(feat_stream_len(fcb, 0) == 39);
+ assert(feat_window_size(fcb) == 3);
+
+ /* CEP; skip C0 */
+ memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
+ /*
+ * DCEP: mfc[2] - mfc[-2];
+ */
+ f = feat[0] + feat_cepsize(fcb) - 1;
+ w = mfc[2] + 1; /* +1 to skip C0 */
+ _w = mfc[-2] + 1;
+
+ for (i = 0; i < feat_cepsize(fcb) - 1; i++)
+ f[i] = w[i] - _w[i];
+
+ /* POW: C0, DC0, D2C0 */
+ f += feat_cepsize(fcb) - 1;
+
+ f[0] = mfc[0][0];
+ f[1] = mfc[2][0] - mfc[-2][0];
+
+ d1 = mfc[3][0] - mfc[-1][0];
+ d2 = mfc[1][0] - mfc[-3][0];
+ f[2] = d1 - d2;
+
+ /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
+ f += 3;
+
+ w1 = mfc[3] + 1; /* Final +1 to skip C0 */
+ _w1 = mfc[-1] + 1;
+ w_1 = mfc[1] + 1;
+ _w_1 = mfc[-3] + 1;
+
+ for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
+ d1 = w1[i] - _w1[i];
+ d2 = w_1[i] - _w_1[i];
+
+ f[i] = d1 - d2;
+ }
+}
+
+
+static void
+feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ assert(fcb);
+ assert(feat_n_stream(fcb) == 1);
+ assert(feat_window_size(fcb) == 0);
+
+ /* CEP */
+ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
+}
+
+static void
+feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ mfcc_t *f;
+ mfcc_t *w, *_w;
+ int32 i;
+
+ assert(fcb);
+ assert(feat_n_stream(fcb) == 1);
+ assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
+ assert(feat_window_size(fcb) == 2);
+
+ /* CEP */
+ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
+
+ /*
+ * DCEP: mfc[2] - mfc[-2];
+ */
+ f = feat[0] + feat_cepsize(fcb);
+ w = mfc[2];
+ _w = mfc[-2];
+
+ for (i = 0; i < feat_cepsize(fcb); i++)
+ f[i] = w[i] - _w[i];
+}
+
+static void
+feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ mfcc_t *f;
+ mfcc_t *w, *_w;
+ mfcc_t *w1, *w_1, *_w1, *_w_1;
+ mfcc_t d1, d2;
+ int32 i;
+
+ assert(fcb);
+ assert(feat_n_stream(fcb) == 1);
+ assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3);
+ assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1);
+
+ /* CEP */
+ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
+
+ /*
+ * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
+ */
+ f = feat[0] + feat_cepsize(fcb);
+ w = mfc[FEAT_DCEP_WIN];
+ _w = mfc[-FEAT_DCEP_WIN];
+
+ for (i = 0; i < feat_cepsize(fcb); i++)
+ f[i] = w[i] - _w[i];
+
+ /*
+ * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
+ * where w = FEAT_DCEP_WIN
+ */
+ f += feat_cepsize(fcb);
+
+ w1 = mfc[FEAT_DCEP_WIN + 1];
+ _w1 = mfc[-FEAT_DCEP_WIN + 1];
+ w_1 = mfc[FEAT_DCEP_WIN - 1];
+ _w_1 = mfc[-FEAT_DCEP_WIN - 1];
+
+ for (i = 0; i < feat_cepsize(fcb); i++) {
+ d1 = w1[i] - _w1[i];
+ d2 = w_1[i] - _w_1[i];
+
+ f[i] = d1 - d2;
+ }
+}
+
+static void
+feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ mfcc_t *f;
+ mfcc_t *w, *_w;
+ mfcc_t *w1, *w_1, *_w1, *_w_1;
+ mfcc_t d1, d2;
+ int32 i;
+
+ assert(fcb);
+ assert(feat_n_stream(fcb) == 1);
+ assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
+ assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);
+
+ /* CEP */
+ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
+
+ /*
+ * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
+ */
+ f = feat[0] + feat_cepsize(fcb);
+ w = mfc[FEAT_DCEP_WIN];
+ _w = mfc[-FEAT_DCEP_WIN];
+
+ for (i = 0; i < feat_cepsize(fcb); i++)
+ f[i] = w[i] - _w[i];
+
+ /*
+ * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
+ */
+ f += feat_cepsize(fcb);
+ w = mfc[FEAT_DCEP_WIN * 2];
+ _w = mfc[-FEAT_DCEP_WIN * 2];
+
+ for (i = 0; i < feat_cepsize(fcb); i++)
+ f[i] = w[i] - _w[i];
+
+ /*
+ * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
+ * where w = FEAT_DCEP_WIN
+ */
+ f += feat_cepsize(fcb);
+
+ w1 = mfc[FEAT_DCEP_WIN + 1];
+ _w1 = mfc[-FEAT_DCEP_WIN + 1];
+ w_1 = mfc[FEAT_DCEP_WIN - 1];
+ _w_1 = mfc[-FEAT_DCEP_WIN - 1];
+
+ for (i = 0; i < feat_cepsize(fcb); i++) {
+ d1 = w1[i] - _w1[i];
+ d2 = w_1[i] - _w_1[i];
+
+ f[i] = d1 - d2;
+ }
+}
+
+static void
+feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
+{
+ int32 win, i, j;
+
+ win = feat_window_size(fcb);
+
+ /* Concatenate input features */
+ for (i = -win; i <= win; ++i) {
+ uint32 spos = 0;
+
+ for (j = 0; j < feat_n_stream(fcb); ++j) {
+ uint32 stream_len;
+
+ /* Unscale the stream length by the window. */
+ stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
+ memcpy(feat[j] + ((i + win) * stream_len),
+ mfc[i] + spos,
+ stream_len * sizeof(mfcc_t));
+ spos += stream_len;
+ }
+ }
+}
+
+feat_t *
+feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
+ agc_type_t agc, int32 breport, int32 cepsize)
+{
+ feat_t *fcb;
+
+ if (cepsize == 0)
+ cepsize = 13;
+ if (breport)
+ E_INFO
+ ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
+ type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);
+
+ fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
+ fcb->refcount = 1;
+ fcb->name = (char *) ckd_salloc(type);
+ if (strcmp(type, "s2_4x") == 0) {
+ /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
+ if (cepsize != 13) {
+ E_ERROR("s2_4x features require cepsize == 13\n");
+ ckd_free(fcb);
+ return NULL;
+ }
+ fcb->cepsize = 13;
+ fcb->n_stream = 4;
+ fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
+ fcb->stream_len[0] = 12;
+ fcb->stream_len[1] = 24;
+ fcb->stream_len[2] = 3;
+ fcb->stream_len[3] = 12;
+ fcb->out_dim = 51;
+ fcb->window_size = 4;
+ fcb->compute_feat = feat_s2_4x_cep2feat;
+ }
+ else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
+ /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
+ if (cepsize != 13) {
+ E_ERROR("s2_4x features require cepsize == 13\n");
+ ckd_free(fcb);
+ return NULL;
+ }
+ fcb->cepsize = 13;
+ fcb->n_stream = 1;
+ fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
+ fcb->stream_len[0] = 39;
+ fcb->out_dim = 39;
+ fcb->window_size = 3;
+ fcb->compute_feat = feat_s3_1x39_cep2feat;
+ }
+ else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
+ fcb->cepsize = cepsize;
+ fcb->n_stream = 1;
+ fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
+ fcb->stream_len[0] = cepsize * 3;
+ fcb->out_dim = cepsize * 3;
+ fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
+ fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
+ }
+ else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
+ fcb->cepsize = cepsize;
+ fcb->n_stream = 1;
+ fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
+ fcb->stream_len[0] = cepsize * 4;
+ fcb->out_dim = cepsize * 4;
+ fcb->window_size = FEAT_DCEP_WIN * 2;
+ fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
+ }
+ else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
+ /* 1-stream cep/dcep */
+ fcb->cepsize = cepsize;
+ fcb->n_stream = 1;
+ fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
+ fcb->stream_len[0] = feat_cepsize(fcb) * 2;
+ fcb->out_dim = fcb->stream_len[0];
+ fcb->window_size = 2;
+ fcb->compute_feat = feat_s3_cep_dcep;
+ }
+ else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
+ /* 1-stream cep */
+ fcb->cepsize = cepsize;
+ fcb->n_stream = 1;
+ fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
+ fcb->stream_len[0] = feat_cepsize(fcb);
+ fcb->out_dim = fcb->stream_len[0];
+ fcb->window_size = 0;
+ fcb->compute_feat = feat_s3_cep;
+ }
+ else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
+ /* 1-stream cep with frames concatenated, so called cepwin features */
+ if (strncmp(type, "1s_3c", 5) == 0)
+ fcb->window_size = 3;
+ else
+ fcb->window_size = 4;
+
+ fcb->cepsize = cepsize;
+ fcb->n_stream = 1;
+ fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
+ fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
+ fcb->out_dim = fcb->stream_len[0];
+ fcb->compute_feat = feat_copy;
+ }
+ else {
+ int32 i, k, l;
+ size_t len;
+ char *strp;
+ char *mtype = ckd_salloc(type);
+ char *wd = ckd_salloc(type);
+ /*
+ * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
+ * comma separated list of feature stream widths; #items =
+ * #streams). An optional window size (frames will be
+ * concatenated) is also allowed, which can be specified with
+ * a colon after the list of feature streams.
+ */
+ len = strlen(mtype);
+ k = 0;
+ for (i = 1; i < len - 1; i++) {
+ if (mtype[i] == ',') {
+ mtype[i] = ' ';
+ k++;
+ }
+ else if (mtype[i] == ':') {
+ mtype[i] = '\0';
+ fcb->window_size = atoi(mtype + i + 1);
+ break;
+ }
+ }
+ k++; /* Presumably there are (#commas+1) streams */
+ fcb->n_stream = k;
+ fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));
+
+ /* Scan individual feature stream lengths */
+ strp = mtype;
+ i = 0;
+ fcb->out_dim = 0;
+ fcb->cepsize = 0;
+ while (sscanf(strp, "%s%n", wd, &l) == 1) {
+ strp += l;
+ if ((i >= fcb->n_stream)
+ || (sscanf(wd, "%u", &(fcb->stream_len[i])) != 1)
+ || (fcb->stream_len[i] <= 0))
+ E_FATAL("Bad feature type argument\n");
+ /* Input size before windowing */
+ fcb->cepsize += fcb->stream_len[i];
+ if (fcb->window_size > 0)
+ fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
+ /* Output size after windowing */
+ fcb->out_dim += fcb->stream_len[i];
+ i++;
+ }
+ if (i != fcb->n_stream)
+ E_FATAL("Bad feature type argument\n");
+ if (fcb->cepsize != cepsize)
+ E_FATAL("Bad feature type argument\n");
+
+ /* Input is already the feature stream */
+ fcb->compute_feat = feat_copy;
+ ckd_free(mtype);
+ ckd_free(wd);
+ }
+
+ if (cmn != CMN_NONE)
+ fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
+ fcb->cmn = cmn;
+ fcb->varnorm = varnorm;
+ if (agc != AGC_NONE) {
+ fcb->agc_struct = agc_init();
+ /*
+ * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
+ * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
+ * switches to EMAX
+ */
+ /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
+ agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
+ }
+ fcb->agc = agc;
+ /*
+ * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
+ */
+ fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
+ feat_cepsize(fcb),
+ sizeof(mfcc_t));
+ /* This one is actually just an array of pointers to "flatten out"
+ * wraparounds. */
+ fcb->tmpcepbuf = (mfcc_t** )ckd_calloc(2 * feat_window_size(fcb) + 1,
+ sizeof(*fcb->tmpcepbuf));
+
+ return fcb;
+}
+
+
+void
+feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
+{
+ uint32 i, j, k;
+
+ for (i = 0; i < nfr; i++) {
+ fprintf(fp, "%8d:\n", i);
+
+ for (j = 0; j < feat_dimension1(fcb); j++) {
+ fprintf(fp, "\t%2d:", j);
+
+ for (k = 0; k < feat_dimension2(fcb, j); k++)
+ fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
+ fprintf(fp, "\n");
+ }
+ }
+
+ fflush(fp);
+}
+
+static void
+feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
+{
+ cmn_type_t cmn_type = fcb->cmn;
+
+ if (!(beginutt && endutt)
+ && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */
+ fcb->cmn = cmn_type = CMN_PRIOR;
+
+ switch (cmn_type) {
+ case CMN_CURRENT:
+ cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
+ break;
+ case CMN_PRIOR:
+ cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
+ if (endutt)
+ cmn_prior_update(fcb->cmn_struct);
+ break;
+ default:
+ ;
+ }
+ cep_dump_dbg(fcb, mfc, nfr, "After CMN");
+}
+
+static void
+feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
+{
+ agc_type_t agc_type = fcb->agc;
+
+ if (!(beginutt && endutt)
+ && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
+ agc_type = AGC_EMAX;
+
+ switch (agc_type) {
+ case AGC_MAX:
+ agc_max(fcb->agc_struct, mfc, nfr);
+ break;
+ case AGC_EMAX:
+ agc_emax(fcb->agc_struct, mfc, nfr);
+ if (endutt)
+ agc_emax_update(fcb->agc_struct);
+ break;
+ case AGC_NOISE:
+ agc_noise(fcb->agc_struct, mfc, nfr);
+ break;
+ default:
+ ;
+ }
+ cep_dump_dbg(fcb, mfc, nfr, "After AGC");
+}
+
+static void
+feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
+{
+ int32 i;
+
+ cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)");
+
+ /* Create feature vectors */
+ for (i = win; i < nfr - win; i++) {
+ fcb->compute_feat(fcb, mfc + i, feat[i - win]);
+ }
+
+ feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation");
+
+ if (fcb->lda) {
+ feat_lda_transform(fcb, feat, nfr - win * 2);
+ feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA");
+ }
+
+ if (fcb->subvecs) {
+ feat_subvec_project(fcb, feat, nfr - win * 2);
+ feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection");
+ }
+}
+
+
+/**
+ * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data).
+ * If out_mfc is NULL, no actual reading will be done, and the number of
+ * frames (plus padding) that would be read is returned.
+ *
+ * It's important that normalization is done before padding because
+ * frames outside the data we are interested in shouldn't be taken
+ * into normalization stats.
+ *
+ * @return # frames read (plus padding) if successful, -1 if
+ * error (e.g., mfc array too small).
+ */
+static int32
+feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
+ int32 sf, int32 ef,
+ mfcc_t ***out_mfc,
+ int32 maxfr,
+ int32 cepsize)
+{
+ FILE *fp;
+ int32 n_float32;
+ float32 *float_feat;
+ struct stat statbuf;
+ int32 i, n, byterev;
+ int32 start_pad, end_pad;
+ mfcc_t **mfc;
+
+ /* Initialize the output pointer to NULL, so that any attempts to
+ free() it if we fail before allocating it will not segfault! */
+ if (out_mfc)
+ *out_mfc = NULL;
+ E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
+ if (ef >= 0 && ef <= sf) {
+ E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
+ return -1;
+ }
+
+ /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
+ if ((stat_retry(file, &statbuf) < 0)
+ || ((fp = fopen(file, "rb")) == NULL)) {
+ E_ERROR_SYSTEM("Failed to open file '%s' for reading", file);
+ return -1;
+ }
+
+ /* Read #floats in header */
+ if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
+ E_ERROR("%s: fread(#floats) failed\n", file);
+ fclose(fp);
+ return -1;
+ }
+
+ /* Check if n_float32 matches file size */
+ byterev = 0;
+ if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
+ n = n_float32;
+ SWAP_INT32(&n);
+
+ if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */
+ E_ERROR
+ ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
+ file, n_float32, n_float32, statbuf.st_size,
+ statbuf.st_size);
+ fclose(fp);
+ return -1;
+ }
+
+ n_float32 = n;
+ byterev = 1;
+ }
+ if (n_float32 <= 0) {
+ E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
+ fclose(fp);
+ return -1;
+ }
+
+ /* Convert n to #frames of input */
+ n = n_float32 / cepsize;
+ if (n * cepsize != n_float32) {
+ E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
+ cepsize);
+ fclose(fp);
+ return -1;
+ }
+
+ /* Check start and end frames */
+ if (sf > 0) {
+ if (sf >= n) {
+ E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
+ sf, n);
+ fclose(fp);
+ return -1;
+ }
+ }
+ if (ef < 0)
+ ef = n-1;
+ else if (ef >= n) {
+ E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
+ file, ef, n);
+ ef = n-1;
+ }
+
+ /* Add window to start and end frames */
+ sf -= win;
+ ef += win;
+ if (sf < 0) {
+ start_pad = -sf;
+ sf = 0;
+ }
+ else
+ start_pad = 0;
+ if (ef >= n) {
+ end_pad = ef - n + 1;
+ ef = n - 1;
+ }
+ else
+ end_pad = 0;
+
+ /* Limit n if indicated by [sf..ef] */
+ if ((ef - sf + 1) < n)
+ n = (ef - sf + 1);
+ if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
+ E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
+ file, maxfr, n + start_pad + end_pad);
+ fclose(fp);
+ return -1;
+ }
+
+ /* If no output buffer was supplied, then skip the actual data reading. */
+ if (out_mfc != NULL) {
+ /* Position at desired start frame and read actual MFC data */
+ mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
+ if (sf > 0)
+ fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
+ n_float32 = n * cepsize;
+#ifdef FIXED_POINT
+ float_feat = ckd_calloc(n_float32, sizeof(float32));
+#else
+ float_feat = mfc[start_pad];
+#endif
+ if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
+ E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
+ ckd_free_2d(mfc);
+ fclose(fp);
+ return -1;
+ }
+ if (byterev) {
+ for (i = 0; i < n_float32; i++) {
+ SWAP_FLOAT32(&float_feat[i]);
+ }
+ }
+#ifdef FIXED_POINT
+ for (i = 0; i < n_float32; ++i) {
+ mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
+ }
+ ckd_free(float_feat);
+#endif
+
+ /* Normalize */
+ feat_cmn(fcb, mfc + start_pad, n, 1, 1);
+ feat_agc(fcb, mfc + start_pad, n, 1, 1);
+
+ /* Replicate start and end frames if necessary. */
+ for (i = 0; i < start_pad; ++i)
+ memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
+ for (i = 0; i < end_pad; ++i)
+ memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
+ cepsize * sizeof(mfcc_t));
+
+ *out_mfc = mfc;
+ }
+
+ fclose(fp);
+ return n + start_pad + end_pad;
+}
+
+
+
+int32
+feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
+ int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
+{
+ char *path;
+ char *ps = "/";
+ int32 win, nfr;
+ size_t file_length, cepext_length, path_length = 0;
+ mfcc_t **mfc;
+
+ if (fcb->cepsize <= 0) {
+ E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
+ return -1;
+ }
+
+ if (cepext == NULL)
+ cepext = "";
+
+ /*
+ * Create mfc filename, combining file, dir and extension if
+ * necessary
+ */
+
+ /*
+ * First we decide about the path. If dir is defined, then use
+ * it. Otherwise assume the filename already contains the path.
+ */
+ if (dir == NULL) {
+ dir = "";
+ ps = "";
+ /*
+ * This is not true but some 3rd party apps
+ * may parse the output explicitly checking for this line
+ */
+ E_INFO("At directory . (current directory)\n");
+ }
+ else {
+ E_INFO("At directory %s\n", dir);
+ /*
+ * Do not forget the path separator!
+ */
+ path_length += strlen(dir) + 1;
+ }
+
+ /*
+ * Include cepext, if it's not already part of the filename.
+ */
+ file_length = strlen(file);
+ cepext_length = strlen(cepext);
+ if ((file_length > cepext_length)
+ && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
+ cepext = "";
+ cepext_length = 0;
+ }
+
+ /*
+ * Do not forget the '\0'
+ */
+ path_length += file_length + cepext_length + 1;
+ path = (char*) ckd_calloc(path_length, sizeof(char));
+
+#ifdef HAVE_SNPRINTF
+ /*
+ * Paranoia is our best friend...
+ */
+ while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
+ path_length = file_length;
+ path = (char*) ckd_realloc(path, path_length * sizeof(char));
+ }
+#else
+ sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
+#endif
+
+ win = feat_window_size(fcb);
+ /* Pad maxfr with win, so we read enough raw feature data to
+ * calculate the requisite number of dynamic features. */
+ if (maxfr >= 0)
+ maxfr += win * 2;
+
+ if (feat != NULL) {
+ /* Read mfc file including window or padding if necessary. */
+ nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
+ ckd_free(path);
+ if (nfr < 0) {
+ ckd_free_2d((void **) mfc);
+ return -1;
+ }
+
+ /* Actually compute the features */
+ feat_compute_utt(fcb, mfc, nfr, win, feat);
+
+ ckd_free_2d((void **) mfc);
+ }
+ else {
+ /* Just calculate the number of frames we would need. */
+ nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
+ ckd_free(path);
+ if (nfr < 0)
+ return nfr;
+ }
+
+
+ return (nfr - win * 2);
+}
+
+static int32
+feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
+ int32 nfr, mfcc_t *** ofeat)
+{
+ mfcc_t **cepbuf;
+ int32 i, win, cepsize;
+
+ win = feat_window_size(fcb);
+ cepsize = feat_cepsize(fcb);
+
+ /* Copy and pad out the utterance (this requires that the
+ * feature computation functions always access the buffer via
+ * the frame pointers, which they do) */
+ cepbuf = (mfcc_t **)ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
+ memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));
+
+ /* Do normalization before we interpolate on the boundary */
+ feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
+ feat_agc(fcb, cepbuf + win, nfr, 1, 1);
+
+ /* Now interpolate */
+ for (i = 0; i < win; ++i) {
+ cepbuf[i] = fcb->cepbuf[i];
+ memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
+ cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
+ memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
+ }
+ /* Compute as usual. */
+ feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
+ ckd_free(cepbuf);
+ return nfr;
+}
+
+int32
+feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
+ int32 beginutt, int32 endutt, mfcc_t *** ofeat)
+{
+ int32 win, cepsize, nbufcep;
+ int32 i, j, nfeatvec;
+ int32 zero = 0;
+
+ /* Avoid having to check this everywhere. */
+ if (inout_ncep == NULL) inout_ncep = &zero;
+
+ /* Special case for entire utterances. */
+ if (beginutt && endutt && *inout_ncep > 0)
+ return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
+
+ win = feat_window_size(fcb);
+ cepsize = feat_cepsize(fcb);
+
+ /* Empty the input buffer on start of utterance. */
+ if (beginutt)
+ fcb->bufpos = fcb->curpos;
+
+ /* Calculate how much data is in the buffer already. */
+ nbufcep = fcb->bufpos - fcb->curpos;
+ if (nbufcep < 0)
+ nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
+ /* Add any data that we have to replicate. */
+ if (beginutt && *inout_ncep > 0)
+ nbufcep += win;
+ if (endutt)
+ nbufcep += win;
+
+ /* Only consume as much input as will fit in the buffer. */
+ if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
+ /* We also can't overwrite the trailing window, hence the
+ * reason why win is subtracted here. */
+ *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
+ /* Cancel end of utterance processing. */
+ endutt = FALSE;
+ }
+
+ /* FIXME: Don't modify the input! */
+ feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
+ feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
+
+ /* Replicate first frame into the first win frames if we're at the
+ * beginning of the utterance and there was some actual input to
+ * deal with. (FIXME: Not entirely sure why that condition) */
+ if (beginutt && *inout_ncep > 0) {
+ for (i = 0; i < win; i++) {
+ memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
+ cepsize * sizeof(mfcc_t));
+ fcb->bufpos %= LIVEBUFBLOCKSIZE;
+ }
+ /* Move the current pointer past this data. */
+ fcb->curpos = fcb->bufpos;
+ nbufcep -= win;
+ }
+
+ /* Copy in frame data to the circular buffer. */
+ for (i = 0; i < *inout_ncep; ++i) {
+ memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
+ cepsize * sizeof(mfcc_t));
+ fcb->bufpos %= LIVEBUFBLOCKSIZE;
+ ++nbufcep;
+ }
+
+ /* Replicate last frame into the last win frames if we're at the
+ * end of the utterance (even if there was no input, so we can
+ * flush the output). */
+ if (endutt) {
+ int32 tpos; /* Index of last input frame. */
+ if (fcb->bufpos == 0)
+ tpos = LIVEBUFBLOCKSIZE - 1;
+ else
+ tpos = fcb->bufpos - 1;
+ for (i = 0; i < win; ++i) {
+ memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
+ cepsize * sizeof(mfcc_t));
+ fcb->bufpos %= LIVEBUFBLOCKSIZE;
+ }
+ }
+
+ /* We have to leave the trailing window of frames. */
+ nfeatvec = nbufcep - win;
+ if (nfeatvec <= 0)
+ return 0; /* Do nothing. */
+
+ for (i = 0; i < nfeatvec; ++i) {
+ /* Handle wraparound cases. */
+ if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
+ /* Use tmpcepbuf for this case. Actually, we just need the pointers. */
+ for (j = -win; j <= win; ++j) {
+ int32 tmppos =
+ (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
+ fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
+ }
+ fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
+ }
+ else {
+ fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
+ }
+ /* Move the read pointer forward. */
+ ++fcb->curpos;
+ fcb->curpos %= LIVEBUFBLOCKSIZE;
+ }
+
+ if (fcb->lda)
+ feat_lda_transform(fcb, ofeat, nfeatvec);
+
+ if (fcb->subvecs)
+ feat_subvec_project(fcb, ofeat, nfeatvec);
+
+ return nfeatvec;
+}
+
+void
+feat_update_stats(feat_t *fcb)
+{
+ if (fcb->cmn == CMN_PRIOR) {
+ cmn_prior_update(fcb->cmn_struct);
+ }
+ if (fcb->agc == AGC_EMAX || fcb->agc == AGC_MAX) {
+ agc_emax_update(fcb->agc_struct);
+ }
+}
+
+feat_t *
+feat_retain(feat_t *f)
+{
+ ++f->refcount;
+ return f;
+}
+
+int
+feat_free(feat_t * f)
+{
+ if (f == NULL)
+ return 0;
+ if (--f->refcount > 0)
+ return f->refcount;
+
+ if (f->cepbuf)
+ ckd_free_2d((void **) f->cepbuf);
+ ckd_free(f->tmpcepbuf);
+
+ if (f->name) {
+ ckd_free((void *) f->name);
+ }
+ if (f->lda)
+ ckd_free_3d((void ***) f->lda);
+
+ ckd_free(f->stream_len);
+ ckd_free(f->sv_len);
+ ckd_free(f->sv_buf);
+ subvecs_free(f->subvecs);
+
+ cmn_free(f->cmn_struct);
+ agc_free(f->agc_struct);
+
+ ckd_free(f);
+ return 0;
+}
+
+
+void
+feat_report(feat_t * f)
+{
+ int i;
+ E_INFO_NOFN("Initialization of feat_t, report:\n");
+ E_INFO_NOFN("Feature type = %s\n", f->name);
+ E_INFO_NOFN("Cepstral size = %d\n", f->cepsize);
+ E_INFO_NOFN("Number of streams = %d\n", f->n_stream);
+ for (i = 0; i < f->n_stream; i++) {
+ E_INFO_NOFN("Vector size of stream[%d]: %d\n", i,
+ f->stream_len[i]);
+ }
+ E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv);
+ for (i = 0; i < f->n_sv; i++) {
+ int32 *sv;
+
+ E_INFO_NOFN("Components of subvector[%d]:", i);
+ for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
+ E_INFOCONT(" %d", *sv);
+ E_INFOCONT("\n");
+ }
+ E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn);
+ E_INFO_NOFN("Whether AGC is used = %d\n", f->agc);
+ E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm);
+ E_INFO_NOFN("\n");
+}
diff --git a/media/sphinxbase/src/libsphinxbase/feat/lda.c b/media/sphinxbase/src/libsphinxbase/feat/lda.c
new file mode 100644
index 000000000..182b029de
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/feat/lda.c
@@ -0,0 +1,158 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2006 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+/*
+ * lda.c -- Read and apply LDA matrices to features.
+ *
+ * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+#include <assert.h>
+#include <string.h>
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4018)
+#endif
+
+#include "sphinxbase/feat.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/bio.h"
+#include "sphinxbase/err.h"
+
+#define MATRIX_FILE_VERSION "0.1"
+
+int32
+feat_read_lda(feat_t *feat, const char *ldafile, int32 dim)
+{
+ FILE *fh;
+ int32 byteswap;
+ uint32 chksum, i, m, n;
+ char **argname, **argval;
+
+ assert(feat);
+ if (feat->n_stream != 1) {
+ E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n",
+ feat->n_stream);
+ return -1;
+ }
+
+ if ((fh = fopen(ldafile, "rb")) == NULL) {
+ E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile);
+ return -1;
+ }
+
+ if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) {
+ E_ERROR("Failed to read header from transform file '%s'\n", ldafile);
+ fclose(fh);
+ return -1;
+ }
+
+ for (i = 0; argname[i]; i++) {
+ if (strcmp(argname[i], "version") == 0) {
+ if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0)
+ E_WARN("%s: Version mismatch: %s, expecting %s\n",
+ ldafile, argval[i], MATRIX_FILE_VERSION);
+ }
+ }
+
+ bio_hdrarg_free(argname, argval);
+ argname = argval = NULL;
+
+ chksum = 0;
+
+ if (feat->lda)
+ ckd_free_3d((void ***)feat->lda);
+
+ {
+ /* Use a temporary variable to avoid strict-aliasing problems. */
+ void ***outlda;
+
+ if (bio_fread_3d(&outlda, sizeof(float32),
+ &feat->n_lda, &m, &n,
+ fh, byteswap, &chksum) < 0) {
+ E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile);
+ fclose(fh);
+ return -1;
+ }
+ feat->lda = (void *)outlda;
+ }
+ fclose(fh);
+
+#ifdef FIXED_POINT
+ /* FIXME: This is a fragile hack that depends on mfcc_t and
+ * float32 being the same size (which they are, but...) */
+ for (i = 0; i < feat->n_lda * m * n; ++i) {
+ feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]);
+ }
+#endif
+
+ /* Note that SphinxTrain stores the eigenvectors as row vectors. */
+ if (n != feat->stream_len[0])
+ E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]);
+
+ /* Override dim from file if it is 0 or greater than m. */
+ if (dim > m || dim <= 0) {
+ dim = m;
+ }
+ feat->out_dim = dim;
+
+ return 0;
+}
+
+void
+feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
+{
+ mfcc_t *tmp;
+ uint32 i, j, k;
+
+ tmp = ckd_calloc(fcb->stream_len[0], sizeof(mfcc_t));
+ for (i = 0; i < nfr; ++i) {
+ /* Do the matrix multiplication inline here since fcb->lda
+ * is transposed (eigenvectors in rows not columns). */
+ /* FIXME: In the future we ought to use the BLAS. */
+ memset(tmp, 0, sizeof(mfcc_t) * fcb->stream_len[0]);
+ for (j = 0; j < feat_dimension(fcb); ++j) {
+ for (k = 0; k < fcb->stream_len[0]; ++k) {
+ tmp[j] += MFCCMUL(inout_feat[i][0][k], fcb->lda[0][j][k]);
+ }
+ }
+ memcpy(inout_feat[i][0], tmp, fcb->stream_len[0] * sizeof(mfcc_t));
+ }
+ ckd_free(tmp);
+}