diff options
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/fe/fe_internal.h')
-rw-r--r-- | media/sphinxbase/src/libsphinxbase/fe/fe_internal.h | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h new file mode 100644 index 000000000..f6c943c72 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h @@ -0,0 +1,216 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __FE_INTERNAL_H__ +#define __FE_INTERNAL_H__ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_noise.h" +#include "fe_prespch_buf.h" +#include "fe_type.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Values for the 'logspec' field. */ +enum { + RAW_LOG_SPEC = 1, + SMOOTH_LOG_SPEC = 2 +}; + +/* Values for the 'transform' field. */ +enum { + LEGACY_DCT = 0, + DCT_II = 1, + DCT_HTK = 2 +}; + +typedef struct melfb_s melfb_t; +/** Base Struct to hold all structure for MFCC computation. */ +struct melfb_s { + float32 sampling_rate; + int32 num_cepstra; + int32 num_filters; + int32 fft_size; + float32 lower_filt_freq; + float32 upper_filt_freq; + /* DCT coefficients. */ + mfcc_t **mel_cosine; + /* Filter coefficients. */ + mfcc_t *filt_coeffs; + int16 *spec_start; + int16 *filt_start; + int16 *filt_width; + /* Luxury mobile home. */ + int32 doublewide; + char const *warp_type; + char const *warp_params; + uint32 warp_id; + /* Precomputed normalization constants for unitary DCT-II/DCT-III */ + mfcc_t sqrt_inv_n, sqrt_inv_2n; + /* Value and coefficients for HTK-style liftering */ + int32 lifter_val; + mfcc_t *lifter; + /* Normalize filters to unit area */ + int32 unit_area; + /* Round filter frequencies to DFT points (hurts accuracy, but is + useful for legacy purposes) */ + int32 round_filters; +}; + +typedef struct ringbuf_s { + powspec_t** bufs; + int16 buf_num; + int32 buf_len; + int16 start; + int16 end; + int32 recs; +} ringbuf_t; + +/* sqrt(1/2), also used for unitary DCT-II/DCT-III */ +#define SQRT_HALF FLOAT2MFCC(0.707106781186548) + +typedef struct vad_data_s { + uint8 global_state; + uint8 state_changed; + uint8 store_pcm; + int16 prespch_num; + int16 postspch_num; + prespch_buf_t* prespch_buf; +} vad_data_t; + +/** Structure for the front-end computation. */ +struct fe_s { + cmd_ln_t *config; + int refcount; + + int16 prespch_len; + int16 postspch_len; + float32 vad_threshold; + + float32 sampling_rate; + int16 frame_rate; + int16 frame_shift; + + float32 window_length; + int16 frame_size; + int16 fft_size; + + uint8 fft_order; + uint8 feature_dimension; + uint8 num_cepstra; + uint8 remove_dc; + uint8 log_spec; + uint8 swap; + uint8 dither; + uint8 transform; + uint8 remove_noise; + uint8 remove_silence; + + float32 pre_emphasis_alpha; + int32 seed; + + size_t sample_counter; + uint8 start_flag; + uint8 reserved; + + /* Twiddle factors for FFT. */ + frame_t *ccc, *sss; + /* Mel filter parameters. */ + melfb_t *mel_fb; + /* Half of a Hamming Window. */ + window_t *hamming_window; + /* Storage for noise removal */ + noise_stats_t *noise_stats; + + /* Storage for VAD variables */ + vad_data_t *vad_data; + + /* Temporary buffers for processing. */ + /* FIXME: too many of these. */ + int16 *spch; + frame_t *frame; + powspec_t *spec, *mfspec; + int16 *overflow_samps; + int16 num_overflow_samps; + int16 prior; +}; + +void fe_init_dither(int32 seed); + +/* Apply 1/2 bit noise to a buffer of audio. */ +int32 fe_dither(int16 *buffer, int32 nsamps); + +/* Load a frame of data into the fe. */ +int fe_read_frame(fe_t *fe, int16 const *in, int32 len); + +/* Shift the input buffer back and read more data. */ +int fe_shift_frame(fe_t *fe, int16 const *in, int32 len); + +/* Process a frame of data into features. */ +void fe_write_frame(fe_t *fe, mfcc_t *fea); + +/* Initialization functions. */ +int32 fe_build_melfilters(melfb_t *MEL_FB); +int32 fe_compute_melcosine(melfb_t *MEL_FB); +void fe_create_hamming(window_t *in, int32 in_len); +void fe_create_twiddle(fe_t *fe); + +fixed32 fe_log_add(fixed32 x, fixed32 y); +fixed32 fe_log_sub(fixed32 x, fixed32 y); + +/* Miscellaneous processing functions. */ +void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep); +void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk); +void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec); + +#ifdef __cplusplus +} +#endif + +#endif /* __FE_INTERNAL_H__ */ |