diff options
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/feat/agc.c')
-rw-r--r-- | media/sphinxbase/src/libsphinxbase/feat/agc.c | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/feat/agc.c b/media/sphinxbase/src/libsphinxbase/feat/agc.c new file mode 100644 index 000000000..271baf49d --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/feat/agc.c @@ -0,0 +1,227 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * agc.c -- Various forms of automatic gain control (AGC) + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.5 2005/06/21 19:25:41 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#include <string.h> +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/agc.h" + +/* NOTE! These must match the enum in agc.h */ +const char *agc_type_str[] = { + "none", + "max", + "emax", + "noise" +}; +static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]); + +agc_type_t +agc_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < n_agc_type_str; ++i) { + if (0 == strcmp(str, agc_type_str[i])) + return (agc_type_t)i; + } + E_FATAL("Unknown AGC type '%s'\n", str); + return AGC_NONE; +} + +agc_t *agc_init(void) +{ + agc_t *agc; + agc = ckd_calloc(1, sizeof(*agc)); + agc->noise_thresh = FLOAT2MFCC(2.0); + + return agc; +} + +void agc_free(agc_t *agc) +{ + ckd_free(agc); +} + +/** + * Normalize c0 for all frames such that max(c0) = 0. + */ +void +agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame) +{ + int32 i; + + if (n_frame <= 0) + return; + agc->obs_max = mfc[0][0]; + for (i = 1; i < n_frame; i++) { + if (mfc[i][0] > agc->obs_max) { + agc->obs_max = mfc[i][0]; + agc->obs_frame = 1; + } + } + + E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max); + for (i = 0; i < n_frame; i++) + mfc[i][0] -= agc->obs_max; +} + +void +agc_emax_set(agc_t *agc, float32 m) +{ + agc->max = FLOAT2MFCC(m); + E_INFO("AGCEMax: max= %.2f\n", m); +} + +float32 +agc_emax_get(agc_t *agc) +{ + return MFCC2FLOAT(agc->max); +} + +void +agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame) +{ + int i; + + if (n_frame <= 0) + return; + for (i = 0; i < n_frame; ++i) { + if (mfc[i][0] > agc->obs_max) { + agc->obs_max = mfc[i][0]; + agc->obs_frame = 1; + } + mfc[i][0] -= agc->max; + } +} + +/* Update estimated max for next utterance */ +void +agc_emax_update(agc_t *agc) +{ + if (agc->obs_frame) { /* Update only if some data observed */ + agc->obs_max_sum += agc->obs_max; + agc->obs_utt++; + + /* Re-estimate max over past history; decay the history */ + agc->max = agc->obs_max_sum / agc->obs_utt; + if (agc->obs_utt == 16) { + agc->obs_max_sum /= 2; + agc->obs_utt = 8; + } + } + E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max); + + /* Reset the accumulators for the next utterance. */ + agc->obs_frame = 0; + agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */ +} + +void +agc_noise(agc_t *agc, + mfcc_t **cep, + int32 nfr) +{ + mfcc_t min_energy; /* Minimum log-energy */ + mfcc_t noise_level; /* Average noise_level */ + int32 i; /* frame index */ + int32 noise_frames; /* Number of noise frames */ + + /* Determine minimum log-energy in utterance */ + min_energy = cep[0][0]; + for (i = 0; i < nfr; ++i) { + if (cep[i][0] < min_energy) + min_energy = cep[i][0]; + } + + /* Average all frames between min_energy and min_energy + agc->noise_thresh */ + noise_frames = 0; + noise_level = 0; + min_energy += agc->noise_thresh; + for (i = 0; i < nfr; ++i) { + if (cep[i][0] < min_energy) { + noise_level += cep[i][0]; + noise_frames++; + } + } + + if (noise_frames > 0) { + noise_level /= noise_frames; + E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level)); + /* Subtract noise_level from all log_energy values */ + for (i = 0; i < nfr; i++) { + cep[i][0] -= noise_level; + } + } +} + +void +agc_set_threshold(agc_t *agc, float32 threshold) +{ + agc->noise_thresh = FLOAT2MFCC(threshold); +} + +float32 +agc_get_threshold(agc_t *agc) +{ + return FLOAT2MFCC(agc->noise_thresh); +} |