diff options
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/fe/fe_noise.c')
-rw-r--r-- | media/sphinxbase/src/libsphinxbase/fe/fe_noise.c | 425 |
1 files changed, 0 insertions, 425 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c deleted file mode 100644 index 4fb6d21a9..000000000 --- a/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c +++ /dev/null @@ -1,425 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2013 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - -/* This noise removal algorithm is inspired by the following papers - * Computationally Efficient Speech Enchancement by Spectral Minina Tracking - * by G. Doblinger - * - * Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition - * by C. Kim. - * - * For the recent research and state of art see papers about IMRCA and - * A Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency - * Cepstra For Robust Speech Recognition by Dong Yu and others - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <math.h> - -#include "sphinxbase/prim_type.h" -#include "sphinxbase/ckd_alloc.h" -#include "sphinxbase/strfuncs.h" -#include "sphinxbase/err.h" - -#include "fe_noise.h" -#include "fe_internal.h" - -/* Noise supression constants */ -#define SMOOTH_WINDOW 4 -#define LAMBDA_POWER 0.7 -#define LAMBDA_A 0.995 -#define LAMBDA_B 0.5 -#define LAMBDA_T 0.85 -#define MU_T 0.2 -#define MAX_GAIN 20 - -struct noise_stats_s { - /* Smoothed power */ - powspec_t *power; - /* Noise estimate */ - powspec_t *noise; - /* Signal floor estimate */ - powspec_t *floor; - /* Peak for temporal masking */ - powspec_t *peak; - - /* Initialize it next time */ - uint8 undefined; - /* Number of items to process */ - uint32 num_filters; - - /* Precomputed constants */ - powspec_t lambda_power; - powspec_t comp_lambda_power; - powspec_t lambda_a; - powspec_t comp_lambda_a; - powspec_t lambda_b; - powspec_t comp_lambda_b; - powspec_t lambda_t; - powspec_t mu_t; - powspec_t max_gain; - powspec_t inv_max_gain; - - powspec_t smooth_scaling[2 * SMOOTH_WINDOW + 3]; -}; - -static void -fe_lower_envelope(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * floor_buf, int32 num_filt) -{ - int i; - - for (i = 0; i < num_filt; i++) { -#ifndef FIXED_POINT - if (buf[i] >= floor_buf[i]) { - floor_buf[i] = - noise_stats->lambda_a * floor_buf[i] + noise_stats->comp_lambda_a * buf[i]; - } - else { - floor_buf[i] = - noise_stats->lambda_b * floor_buf[i] + noise_stats->comp_lambda_b * buf[i]; - } -#else - if (buf[i] >= floor_buf[i]) { - floor_buf[i] = fe_log_add(noise_stats->lambda_a + floor_buf[i], - noise_stats->comp_lambda_a + buf[i]); - } - else { - floor_buf[i] = fe_log_add(noise_stats->lambda_b + floor_buf[i], - noise_stats->comp_lambda_b + buf[i]); - } -#endif - } -} - -/* temporal masking */ -static void -fe_temp_masking(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * peak, int32 num_filt) -{ - powspec_t cur_in; - int i; - - for (i = 0; i < num_filt; i++) { - cur_in = buf[i]; - -#ifndef FIXED_POINT - peak[i] *= noise_stats->lambda_t; - if (buf[i] < noise_stats->lambda_t * peak[i]) - buf[i] = peak[i] * noise_stats->mu_t; -#else - peak[i] += noise_stats->lambda_t; - if (buf[i] < noise_stats->lambda_t + peak[i]) - buf[i] = peak[i] + noise_stats->mu_t; -#endif - - if (cur_in > peak[i]) - peak[i] = cur_in; - } -} - -/* spectral weight smoothing */ -static void -fe_weight_smooth(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * coefs, int32 num_filt) -{ - int i, j; - int l1, l2; - powspec_t coef; - - for (i = 0; i < num_filt; i++) { - l1 = ((i - SMOOTH_WINDOW) > 0) ? (i - SMOOTH_WINDOW) : 0; - l2 = ((i + SMOOTH_WINDOW) < - (num_filt - 1)) ? (i + SMOOTH_WINDOW) : (num_filt - 1); - -#ifndef FIXED_POINT - coef = 0; - for (j = l1; j <= l2; j++) { - coef += coefs[j]; - } - buf[i] = buf[i] * (coef / (l2 - l1 + 1)); -#else - coef = MIN_FIXLOG; - for (j = l1; j <= l2; j++) { - coef = fe_log_add(coef, coefs[j]); - } - buf[i] = buf[i] + coef - noise_stats->smooth_scaling[l2 - l1 + 1]; -#endif - - } -} - -noise_stats_t * -fe_init_noisestats(int num_filters) -{ - int i; - noise_stats_t *noise_stats; - - noise_stats = (noise_stats_t *) ckd_calloc(1, sizeof(noise_stats_t)); - - noise_stats->power = - (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); - noise_stats->noise = - (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); - noise_stats->floor = - (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); - noise_stats->peak = - (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); - - noise_stats->undefined = TRUE; - noise_stats->num_filters = num_filters; - -#ifndef FIXED_POINT - noise_stats->lambda_power = LAMBDA_POWER; - noise_stats->comp_lambda_power = 1 - LAMBDA_POWER; - noise_stats->lambda_a = LAMBDA_A; - noise_stats->comp_lambda_a = 1 - LAMBDA_A; - noise_stats->lambda_b = LAMBDA_B; - noise_stats->comp_lambda_b = 1 - LAMBDA_B; - noise_stats->lambda_t = LAMBDA_T; - noise_stats->mu_t = MU_T; - noise_stats->max_gain = MAX_GAIN; - noise_stats->inv_max_gain = 1.0 / MAX_GAIN; - - for (i = 1; i < 2 * SMOOTH_WINDOW + 1; i++) { - noise_stats->smooth_scaling[i] = 1.0 / i; - } -#else - noise_stats->lambda_power = FLOAT2FIX(log(LAMBDA_POWER)); - noise_stats->comp_lambda_power = FLOAT2FIX(log(1 - LAMBDA_POWER)); - noise_stats->lambda_a = FLOAT2FIX(log(LAMBDA_A)); - noise_stats->comp_lambda_a = FLOAT2FIX(log(1 - LAMBDA_A)); - noise_stats->lambda_b = FLOAT2FIX(log(LAMBDA_B)); - noise_stats->comp_lambda_b = FLOAT2FIX(log(1 - LAMBDA_B)); - noise_stats->lambda_t = FLOAT2FIX(log(LAMBDA_T)); - noise_stats->mu_t = FLOAT2FIX(log(MU_T)); - noise_stats->max_gain = FLOAT2FIX(log(MAX_GAIN)); - noise_stats->inv_max_gain = FLOAT2FIX(log(1.0 / MAX_GAIN)); - - for (i = 1; i < 2 * SMOOTH_WINDOW + 3; i++) { - noise_stats->smooth_scaling[i] = FLOAT2FIX(log(i)); - } -#endif - - return noise_stats; -} - -void -fe_reset_noisestats(noise_stats_t * noise_stats) -{ - if (noise_stats) - noise_stats->undefined = TRUE; -} - -void -fe_free_noisestats(noise_stats_t * noise_stats) -{ - ckd_free(noise_stats->power); - ckd_free(noise_stats->noise); - ckd_free(noise_stats->floor); - ckd_free(noise_stats->peak); - ckd_free(noise_stats); -} - -/** - * For fixed point we are doing the computation in a fixlog domain, - * so we have to add many processing cases. - */ -void -fe_track_snr(fe_t * fe, int32 *in_speech) -{ - powspec_t *signal; - powspec_t *gain; - noise_stats_t *noise_stats; - powspec_t *mfspec; - int32 i, num_filts; - powspec_t lrt, snr, max_signal, log_signal; - - if (!(fe->remove_noise || fe->remove_silence)) { - *in_speech = TRUE; - return; - } - - noise_stats = fe->noise_stats; - mfspec = fe->mfspec; - num_filts = noise_stats->num_filters; - - signal = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); - - if (noise_stats->undefined) { - for (i = 0; i < num_filts; i++) { - noise_stats->power[i] = mfspec[i]; - noise_stats->noise[i] = mfspec[i]; -#ifndef FIXED_POINT - noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain; - noise_stats->peak[i] = 0.0; -#else - noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain; - noise_stats->peak[i] = MIN_FIXLOG; -#endif - } - noise_stats->undefined = FALSE; - } - - /* Calculate smoothed power */ - for (i = 0; i < num_filts; i++) { -#ifndef FIXED_POINT - noise_stats->power[i] = - noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i]; -#else - noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i], - noise_stats->comp_lambda_power + mfspec[i]); -#endif - } - - /* Noise estimation and vad decision */ - fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts); - - lrt = FLOAT2FIX(0.0f); - max_signal = FLOAT2FIX(0.0f); - for (i = 0; i < num_filts; i++) { -#ifndef FIXED_POINT - signal[i] = noise_stats->power[i] - noise_stats->noise[i]; - if (signal[i] < 1.0) - signal[i] = 1.0; - snr = log(noise_stats->power[i] / noise_stats->noise[i]); - log_signal = log(signal[i]); -#else - signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]); - snr = noise_stats->power[i] - noise_stats->noise[i]; - log_signal = signal[i]; -#endif - if (snr > lrt) { - lrt = snr; - if (log_signal > max_signal) { - max_signal = log_signal; - } - } - } - -#ifndef FIXED_POINT - if (fe->remove_silence && (lrt < fe->vad_threshold || max_signal < fe->vad_threshold)) { -#else - if (fe->remove_silence && (lrt < FLOAT2FIX(fe->vad_threshold) || max_signal < FLOAT2FIX(fe->vad_threshold))) { -#endif - *in_speech = FALSE; - } else { - *in_speech = TRUE; - } - - fe_lower_envelope(noise_stats, signal, noise_stats->floor, num_filts); - - fe_temp_masking(noise_stats, signal, noise_stats->peak, num_filts); - - if (!fe->remove_noise) { - //no need for further calculations if noise cancellation disabled - ckd_free(signal); - return; - } - - for (i = 0; i < num_filts; i++) { - if (signal[i] < noise_stats->floor[i]) - signal[i] = noise_stats->floor[i]; - } - - gain = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); -#ifndef FIXED_POINT - for (i = 0; i < num_filts; i++) { - if (signal[i] < noise_stats->max_gain * noise_stats->power[i]) - gain[i] = signal[i] / noise_stats->power[i]; - else - gain[i] = noise_stats->max_gain; - if (gain[i] < noise_stats->inv_max_gain) - gain[i] = noise_stats->inv_max_gain; - } -#else - for (i = 0; i < num_filts; i++) { - gain[i] = signal[i] - noise_stats->power[i]; - if (gain[i] > noise_stats->max_gain) - gain[i] = noise_stats->max_gain; - if (gain[i] < noise_stats->inv_max_gain) - gain[i] = noise_stats->inv_max_gain; - } -#endif - - /* Weight smoothing and time frequency normalization */ - fe_weight_smooth(noise_stats, mfspec, gain, num_filts); - - ckd_free(gain); - ckd_free(signal); -} - -void -fe_vad_hangover(fe_t * fe, mfcc_t * fea, int32 is_speech) -{ - /* track vad state and deal with cepstrum prespeech buffer */ - fe->vad_data->state_changed = 0; - if (is_speech) { - fe->vad_data->postspch_num = 0; - if (!fe->vad_data->global_state) { - fe->vad_data->prespch_num++; - fe_prespch_write_cep(fe->vad_data->prespch_buf, fea); - /* check for transition sil->speech */ - if (fe->vad_data->prespch_num >= fe->prespch_len) { - fe->vad_data->prespch_num = 0; - fe->vad_data->global_state = 1; - /* transition silence->speech occurred */ - fe->vad_data->state_changed = 1; - } - } - } else { - fe->vad_data->prespch_num = 0; - fe_prespch_reset_cep(fe->vad_data->prespch_buf); - if (fe->vad_data->global_state) { - fe->vad_data->postspch_num++; - /* check for transition speech->sil */ - if (fe->vad_data->postspch_num >= fe->postspch_len) { - fe->vad_data->postspch_num = 0; - fe->vad_data->global_state = 0; - /* transition speech->silence occurred */ - fe->vad_data->state_changed = 1; - } - } - } - - if (fe->vad_data->store_pcm) { - if (is_speech || fe->vad_data->global_state) - fe_prespch_write_pcm(fe->vad_data->prespch_buf, fe->spch); - if (!is_speech && !fe->vad_data->global_state) - fe_prespch_reset_pcm(fe->vad_data->prespch_buf); - } -} |