summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /media/sphinxbase/src/libsphinxbase/fe/fe_internal.h
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/fe/fe_internal.h')
-rw-r--r--media/sphinxbase/src/libsphinxbase/fe/fe_internal.h216
1 files changed, 216 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h
new file mode 100644
index 000000000..f6c943c72
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h
@@ -0,0 +1,216 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+#ifndef __FE_INTERNAL_H__
+#define __FE_INTERNAL_H__
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "sphinxbase/fe.h"
+#include "sphinxbase/fixpoint.h"
+
+#include "fe_noise.h"
+#include "fe_prespch_buf.h"
+#include "fe_type.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+/* Fool Emacs. */
+}
+#endif
+
+/* Values for the 'logspec' field. */
+enum {
+ RAW_LOG_SPEC = 1,
+ SMOOTH_LOG_SPEC = 2
+};
+
+/* Values for the 'transform' field. */
+enum {
+ LEGACY_DCT = 0,
+ DCT_II = 1,
+ DCT_HTK = 2
+};
+
+typedef struct melfb_s melfb_t;
+/** Base Struct to hold all structure for MFCC computation. */
+struct melfb_s {
+ float32 sampling_rate;
+ int32 num_cepstra;
+ int32 num_filters;
+ int32 fft_size;
+ float32 lower_filt_freq;
+ float32 upper_filt_freq;
+ /* DCT coefficients. */
+ mfcc_t **mel_cosine;
+ /* Filter coefficients. */
+ mfcc_t *filt_coeffs;
+ int16 *spec_start;
+ int16 *filt_start;
+ int16 *filt_width;
+ /* Luxury mobile home. */
+ int32 doublewide;
+ char const *warp_type;
+ char const *warp_params;
+ uint32 warp_id;
+ /* Precomputed normalization constants for unitary DCT-II/DCT-III */
+ mfcc_t sqrt_inv_n, sqrt_inv_2n;
+ /* Value and coefficients for HTK-style liftering */
+ int32 lifter_val;
+ mfcc_t *lifter;
+ /* Normalize filters to unit area */
+ int32 unit_area;
+ /* Round filter frequencies to DFT points (hurts accuracy, but is
+ useful for legacy purposes) */
+ int32 round_filters;
+};
+
+typedef struct ringbuf_s {
+ powspec_t** bufs;
+ int16 buf_num;
+ int32 buf_len;
+ int16 start;
+ int16 end;
+ int32 recs;
+} ringbuf_t;
+
+/* sqrt(1/2), also used for unitary DCT-II/DCT-III */
+#define SQRT_HALF FLOAT2MFCC(0.707106781186548)
+
+typedef struct vad_data_s {
+ uint8 global_state;
+ uint8 state_changed;
+ uint8 store_pcm;
+ int16 prespch_num;
+ int16 postspch_num;
+ prespch_buf_t* prespch_buf;
+} vad_data_t;
+
+/** Structure for the front-end computation. */
+struct fe_s {
+ cmd_ln_t *config;
+ int refcount;
+
+ int16 prespch_len;
+ int16 postspch_len;
+ float32 vad_threshold;
+
+ float32 sampling_rate;
+ int16 frame_rate;
+ int16 frame_shift;
+
+ float32 window_length;
+ int16 frame_size;
+ int16 fft_size;
+
+ uint8 fft_order;
+ uint8 feature_dimension;
+ uint8 num_cepstra;
+ uint8 remove_dc;
+ uint8 log_spec;
+ uint8 swap;
+ uint8 dither;
+ uint8 transform;
+ uint8 remove_noise;
+ uint8 remove_silence;
+
+ float32 pre_emphasis_alpha;
+ int32 seed;
+
+ size_t sample_counter;
+ uint8 start_flag;
+ uint8 reserved;
+
+ /* Twiddle factors for FFT. */
+ frame_t *ccc, *sss;
+ /* Mel filter parameters. */
+ melfb_t *mel_fb;
+ /* Half of a Hamming Window. */
+ window_t *hamming_window;
+ /* Storage for noise removal */
+ noise_stats_t *noise_stats;
+
+ /* Storage for VAD variables */
+ vad_data_t *vad_data;
+
+ /* Temporary buffers for processing. */
+ /* FIXME: too many of these. */
+ int16 *spch;
+ frame_t *frame;
+ powspec_t *spec, *mfspec;
+ int16 *overflow_samps;
+ int16 num_overflow_samps;
+ int16 prior;
+};
+
+void fe_init_dither(int32 seed);
+
+/* Apply 1/2 bit noise to a buffer of audio. */
+int32 fe_dither(int16 *buffer, int32 nsamps);
+
+/* Load a frame of data into the fe. */
+int fe_read_frame(fe_t *fe, int16 const *in, int32 len);
+
+/* Shift the input buffer back and read more data. */
+int fe_shift_frame(fe_t *fe, int16 const *in, int32 len);
+
+/* Process a frame of data into features. */
+void fe_write_frame(fe_t *fe, mfcc_t *fea);
+
+/* Initialization functions. */
+int32 fe_build_melfilters(melfb_t *MEL_FB);
+int32 fe_compute_melcosine(melfb_t *MEL_FB);
+void fe_create_hamming(window_t *in, int32 in_len);
+void fe_create_twiddle(fe_t *fe);
+
+fixed32 fe_log_add(fixed32 x, fixed32 y);
+fixed32 fe_log_sub(fixed32 x, fixed32 y);
+
+/* Miscellaneous processing functions. */
+void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep);
+void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk);
+void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __FE_INTERNAL_H__ */