diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /media/sphinxbase/src/libsphinxbase/fe/fe_interface.c | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/fe/fe_interface.c')
-rw-r--r-- | media/sphinxbase/src/libsphinxbase/fe/fe_interface.c | 776 |
1 files changed, 776 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c b/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c new file mode 100644 index 000000000..cd2e1e2db --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c @@ -0,0 +1,776 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <stdlib.h> +#include <assert.h> +#ifdef _WIN32_WCE +#include <windows.h> +#else +#include <time.h> +#endif + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmd_ln.h" +#include "sphinxbase/ckd_alloc.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +static const arg_t fe_args[] = { + waveform_to_cepstral_command_line_macro(), + { NULL, 0, NULL, NULL } +}; + +int +fe_parse_general_params(cmd_ln_t *config, fe_t * fe) +{ + int j, frate; + + fe->config = config; + fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); + frate = cmd_ln_int32_r(config, "-frate"); + if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) { + E_ERROR + ("Frame rate %d can not be bigger than sample rate %.02f\n", + frate, fe->sampling_rate); + return -1; + } + + fe->frame_rate = (int16)frate; + if (cmd_ln_boolean_r(config, "-dither")) { + fe->dither = 1; + fe->seed = cmd_ln_int32_r(config, "-seed"); + } +#ifdef WORDS_BIGENDIAN + fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; +#else + fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; +#endif + fe->window_length = cmd_ln_float32_r(config, "-wlen"); + fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); + + fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); + fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); + + /* Check FFT size, compute FFT order (log_2(n)) */ + for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { + if (((j % 2) != 0) || (fe->fft_size <= 0)) { + E_ERROR("fft: number of points must be a power of 2 (is %d)\n", + fe->fft_size); + return -1; + } + } + /* Verify that FFT size is greater or equal to window length. */ + if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { + E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", + (int)(fe->window_length * fe->sampling_rate)); + return -1; + } + + fe->prespch_len = (int16)cmd_ln_int32_r(config, "-vad_prespeech"); + fe->postspch_len = (int16)cmd_ln_int32_r(config, "-vad_postspeech"); + fe->vad_threshold = cmd_ln_float32_r(config, "-vad_threshold"); + + fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); + fe->remove_noise = cmd_ln_boolean_r(config, "-remove_noise"); + fe->remove_silence = cmd_ln_boolean_r(config, "-remove_silence"); + + if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) + fe->transform = DCT_II; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) + fe->transform = LEGACY_DCT; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) + fe->transform = DCT_HTK; + else { + E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); + return -1; + } + + if (cmd_ln_boolean_r(config, "-logspec")) + fe->log_spec = RAW_LOG_SPEC; + if (cmd_ln_boolean_r(config, "-smoothspec")) + fe->log_spec = SMOOTH_LOG_SPEC; + + return 0; +} + +static int +fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) +{ + mel->sampling_rate = fe->sampling_rate; + mel->fft_size = fe->fft_size; + mel->num_cepstra = fe->num_cepstra; + mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); + + if (fe->log_spec) + fe->feature_dimension = mel->num_filters; + else + fe->feature_dimension = fe->num_cepstra; + + mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); + mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); + + mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); + + mel->warp_type = cmd_ln_str_r(config, "-warp_type"); + mel->warp_params = cmd_ln_str_r(config, "-warp_params"); + mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); + + mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); + mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); + + if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { + E_ERROR("Failed to initialize the warping function.\n"); + return -1; + } + fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); + return 0; +} + +void +fe_print_current(fe_t const *fe) +{ + E_INFO("Current FE Parameters:\n"); + E_INFO("\tSampling Rate: %f\n", fe->sampling_rate); + E_INFO("\tFrame Size: %d\n", fe->frame_size); + E_INFO("\tFrame Shift: %d\n", fe->frame_shift); + E_INFO("\tFFT Size: %d\n", fe->fft_size); + E_INFO("\tLower Frequency: %g\n", + fe->mel_fb->lower_filt_freq); + E_INFO("\tUpper Frequency: %g\n", + fe->mel_fb->upper_filt_freq); + E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters); + E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps); + E_INFO("\tStart Utt Status: %d\n", fe->start_flag); + E_INFO("Will %sremove DC offset at frame level\n", + fe->remove_dc ? "" : "not "); + if (fe->dither) { + E_INFO("Will add dither to audio\n"); + E_INFO("Dither seeded with %d\n", fe->seed); + } + else { + E_INFO("Will not add dither to audio\n"); + } + if (fe->mel_fb->lifter_val) { + E_INFO("Will apply sine-curve liftering, period %d\n", + fe->mel_fb->lifter_val); + } + E_INFO("Will %snormalize filters to unit area\n", + fe->mel_fb->unit_area ? "" : "not "); + E_INFO("Will %sround filter frequencies to DFT points\n", + fe->mel_fb->round_filters ? "" : "not "); + E_INFO("Will %suse double bandwidth in mel filter\n", + fe->mel_fb->doublewide ? "" : "not "); +} + +fe_t * +fe_init_auto() +{ + return fe_init_auto_r(cmd_ln_get()); +} + +fe_t * +fe_init_auto_r(cmd_ln_t *config) +{ + fe_t *fe; + int prespch_frame_len; + + fe = (fe_t*)ckd_calloc(1, sizeof(*fe)); + fe->refcount = 1; + + /* transfer params to front end */ + if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { + fe_free(fe); + return NULL; + } + + /* compute remaining fe parameters */ + /* We add 0.5 so approximate the float with the closest + * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 + */ + fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); + fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); + fe->prior = 0; + + fe_start_stream(fe); + + assert (fe->frame_shift > 1); + + if (fe->frame_size > (fe->fft_size)) { + E_ERROR + ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n", + fe->frame_size, fe->fft_size); + fe_free(fe); + return NULL; + } + + if (fe->dither) + fe_init_dither(fe->seed); + + /* establish buffers for overflow samps and hamming window */ + fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); + fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); + + /* create hamming window */ + fe_create_hamming(fe->hamming_window, fe->frame_size); + + /* init and fill appropriate filter structure */ + fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); + + /* transfer params to mel fb */ + fe_parse_melfb_params(config, fe, fe->mel_fb); + + if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) { + E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", + fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2); + fe_free(fe); + return NULL; + } + + fe_build_melfilters(fe->mel_fb); + + fe_compute_melcosine(fe->mel_fb); + if (fe->remove_noise || fe->remove_silence) + fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters); + + fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data)); + prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters; + fe->vad_data->prespch_buf = fe_prespch_init(fe->prespch_len + 1, prespch_frame_len, fe->frame_shift); + + /* Create temporary FFT, spectrum and mel-spectrum buffers. */ + /* FIXME: Gosh there are a lot of these. */ + fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); + fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); + fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); + fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); + + /* create twiddle factors */ + fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); + fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); + fe_create_twiddle(fe); + + if (cmd_ln_boolean_r(config, "-verbose")) { + fe_print_current(fe); + } + + /*** Initialize the overflow buffers ***/ + fe_start_utt(fe); + return fe; +} + +arg_t const * +fe_get_args(void) +{ + return fe_args; +} + +const cmd_ln_t * +fe_get_config(fe_t *fe) +{ + return fe->config; +} + +void +fe_init_dither(int32 seed) +{ + if (seed < 0) { + E_INFO("You are using the internal mechanism to generate the seed.\n"); +#ifdef _WIN32_WCE + s3_rand_seed(GetTickCount()); +#else + s3_rand_seed((long) time(0)); +#endif + } else { + E_INFO("You are using %d as the seed.\n", seed); + s3_rand_seed(seed); + } +} + +static void +fe_reset_vad_data(vad_data_t * vad_data) +{ + vad_data->global_state = 0; + vad_data->state_changed = 0; + vad_data->prespch_num = 0; + vad_data->postspch_num = 0; + fe_prespch_reset_cep(vad_data->prespch_buf); +} + +int32 +fe_start_utt(fe_t * fe) +{ + fe->num_overflow_samps = 0; + memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16)); + fe->start_flag = 1; + fe->prior = 0; + fe_reset_vad_data(fe->vad_data); + return 0; +} + +void +fe_start_stream(fe_t *fe) +{ + fe->sample_counter = 0; + fe_reset_noisestats(fe->noise_stats); +} + +int +fe_get_output_size(fe_t *fe) +{ + return (int)fe->feature_dimension; +} + +void +fe_get_input_size(fe_t *fe, int *out_frame_shift, + int *out_frame_size) +{ + if (out_frame_shift) + *out_frame_shift = fe->frame_shift; + if (out_frame_size) + *out_frame_size = fe->frame_size; +} + +uint8 +fe_get_vad_state(fe_t *fe) +{ + return fe->vad_data->global_state; +} + +int +fe_process_frames(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int32 *out_frameidx) +{ + int outidx, n_overflow, orig_n_overflow; + int16 const *orig_spch; + size_t orig_nsamps; + + /* In the special case where there is no output buffer, return the + * maximum number of frames which would be generated. */ + if (buf_cep == NULL) { + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) + *inout_nframes = 0; + else + *inout_nframes = 1 + + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) + / fe->frame_shift); + if (fe->vad_data->global_state) + *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf); + return *inout_nframes; + } + + if (out_frameidx) + *out_frameidx = 0; + + /* Are there not enough samples to make at least 1 frame? */ + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) { + if (*inout_nsamps > 0) { + /* Append them to the overflow buffer. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, *inout_nsamps * (sizeof(int16))); + fe->num_overflow_samps += *inout_nsamps; + /* Update input-output pointers and counters. */ + *inout_spch += *inout_nsamps; + *inout_nsamps = 0; + } + /* We produced no frames of output, sorry! */ + *inout_nframes = 0; + return 0; + } + + /* Can't write a frame? Then do nothing! */ + if (*inout_nframes < 1) { + *inout_nframes = 0; + return 0; + } + + /* Index of output frame. */ + outidx = 0; + + /* Try to read from prespeech buffer */ + if (fe->vad_data->global_state) { + while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) { + outidx++; + (*inout_nframes)--; + } + if ((*inout_nframes) < 1) { + /* mfcc buffer is filled from prespeech buffer */ + *inout_nframes = outidx; + return 0; + } + + /* Sets the start frame for the returned data so that caller can update timings */ + if (out_frameidx && fe->vad_data->state_changed) { + *out_frameidx = fe->sample_counter / fe->frame_shift - fe->prespch_len; + } + } + + /* Keep track of the original start of the buffer. */ + orig_spch = *inout_spch; + orig_nsamps = *inout_nsamps; + orig_n_overflow = fe->num_overflow_samps; + + /* Start processing, taking care of any incoming overflow. */ + if (fe->num_overflow_samps) { + int offset = fe->frame_size - fe->num_overflow_samps; + + /* Append start of spch to overflow samples to make a full frame. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, offset * sizeof(**inout_spch)); + fe_read_frame(fe, fe->overflow_samps, fe->frame_size); + /* Update input-output pointers and counters. */ + *inout_spch += offset; + *inout_nsamps -= offset; + fe->num_overflow_samps -= fe->frame_shift; + } else { + fe_read_frame(fe, *inout_spch, fe->frame_size); + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_size; + *inout_nsamps -= fe->frame_size; + } + + fe_write_frame(fe, buf_cep[outidx]); + + if (!fe->vad_data->state_changed && fe->vad_data->global_state) { + outidx++; + (*inout_nframes)--; + } + if (fe->vad_data->state_changed && fe->vad_data->global_state) { + /* previous frame triggered vad into speech state + * dumping prespeech buffer */ + while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) { + outidx++; + (*inout_nframes)--; + } + + /* Sets the start frame for the returned data so that caller can update timings */ + if (out_frameidx) { + *out_frameidx = (fe->sample_counter + orig_nsamps - *inout_nsamps) / fe->frame_shift - fe->prespch_len; + } + } + + /* Process all remaining frames. */ + while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) { + fe_shift_frame(fe, *inout_spch, fe->frame_shift); + fe_write_frame(fe, buf_cep[outidx]); + if (!fe->vad_data->state_changed && fe->vad_data->global_state) { + (*inout_nframes)--; + outidx++; + } + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_shift; + *inout_nsamps -= fe->frame_shift; + /* Amount of data behind the original input which is still needed. */ + if (fe->num_overflow_samps > 0) + fe->num_overflow_samps -= fe->frame_shift; + + if (fe->vad_data->state_changed && fe->vad_data->global_state) { + /* previous frame triggered vad into speech state */ + while (*inout_nframes > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) != 0) { + (*inout_nframes)--; + outidx++; + } + } + } + + /* How many relevant overflow samples are there left? */ + if (fe->num_overflow_samps <= 0) { + /* Maximum number of overflow samples past *inout_spch to save. */ + n_overflow = *inout_nsamps; + if (n_overflow > fe->frame_shift) + n_overflow = fe->frame_shift; + fe->num_overflow_samps = fe->frame_size - fe->frame_shift; + /* Make sure this isn't an illegal read! */ + if (fe->num_overflow_samps > *inout_spch - orig_spch) + fe->num_overflow_samps = *inout_spch - orig_spch; + fe->num_overflow_samps += n_overflow; + if (fe->num_overflow_samps > 0) { + memcpy(fe->overflow_samps, + *inout_spch - (fe->frame_size - fe->frame_shift), + fe->num_overflow_samps * sizeof(**inout_spch)); + /* Update the input pointer to cover this stuff. */ + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } else { + /* There is still some relevant data left in the overflow buffer. */ + /* Shift existing data to the beginning. */ + memmove(fe->overflow_samps, + fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps, + fe->num_overflow_samps * sizeof(*fe->overflow_samps)); + /* Copy in whatever we had in the original speech buffer. */ + n_overflow = *inout_spch - orig_spch + *inout_nsamps; + if (n_overflow > fe->frame_size - fe->num_overflow_samps) + n_overflow = fe->frame_size - fe->num_overflow_samps; + memcpy(fe->overflow_samps + fe->num_overflow_samps, + orig_spch, n_overflow * sizeof(*orig_spch)); + fe->num_overflow_samps += n_overflow; + /* Advance the input pointers. */ + if (n_overflow > *inout_spch - orig_spch) { + n_overflow -= (*inout_spch - orig_spch); + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } + + /* Finally update the frame counter with the number of frames + * and global sample counter with number of samples we procesed*/ + *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */ + fe->sample_counter += orig_nsamps - *inout_nsamps; + return 0; +} + +int +fe_process_frames_ext(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int16 **voiced_spch, + int32 *voiced_spch_nsamps, + int32 *out_frameidx) +{ + int proc_result; + + fe_prespch_extend_pcm(fe->vad_data->prespch_buf, *inout_nframes); + + fe->vad_data->store_pcm = TRUE; + proc_result = fe_process_frames(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes, out_frameidx); + fe->vad_data->store_pcm = FALSE; + + if (fe->vad_data->global_state) + fe_prespch_read_pcm(fe->vad_data->prespch_buf, voiced_spch, voiced_spch_nsamps); + else + *voiced_spch_nsamps = 0; + + return proc_result; +} + +int +fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps, + mfcc_t *** cep_block, int32 * nframes) +{ + mfcc_t **cep; + int rv; + + /* Figure out how many frames we will need. */ + fe_process_frames(fe, NULL, &nsamps, NULL, nframes, NULL); + /* Create the output buffer (it has to exist, even if there are no output frames). */ + if (*nframes) + cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep)); + else + cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep)); + /* Now just call fe_process_frames() with the allocated buffer. */ + rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes, NULL); + *cep_block = cep; + + return rv; +} + + +int32 +fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes) +{ + /* Process any remaining data. */ + *nframes = 0; + if (fe->num_overflow_samps > 0) { + fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps); + fe_write_frame(fe, cepvector); + if (!fe->vad_data->state_changed && fe->vad_data->global_state) + (*nframes)++; + } + + /* reset overflow buffers... */ + fe->num_overflow_samps = 0; + fe->start_flag = 0; + + return 0; +} + +fe_t * +fe_retain(fe_t *fe) +{ + ++fe->refcount; + return fe; +} + +int +fe_free(fe_t * fe) +{ + if (fe == NULL) + return 0; + if (--fe->refcount > 0) + return fe->refcount; + + /* kill FE instance - free everything... */ + if (fe->mel_fb) { + if (fe->mel_fb->mel_cosine) + fe_free_2d((void *) fe->mel_fb->mel_cosine); + ckd_free(fe->mel_fb->lifter); + ckd_free(fe->mel_fb->spec_start); + ckd_free(fe->mel_fb->filt_start); + ckd_free(fe->mel_fb->filt_width); + ckd_free(fe->mel_fb->filt_coeffs); + ckd_free(fe->mel_fb); + } + ckd_free(fe->spch); + ckd_free(fe->frame); + ckd_free(fe->ccc); + ckd_free(fe->sss); + ckd_free(fe->spec); + ckd_free(fe->mfspec); + ckd_free(fe->overflow_samps); + ckd_free(fe->hamming_window); + + if (fe->noise_stats) + fe_free_noisestats(fe->noise_stats); + + if (fe->vad_data) { + fe_prespch_free(fe->vad_data->prespch_buf); + ckd_free(fe->vad_data); + } + + cmd_ln_free_r(fe->config); + ckd_free(fe); + + return 0; +} + +/** + * Convert a block of mfcc_t to float32 (can be done in-place) + **/ +int32 +fe_mfcc_to_float(fe_t * fe, + mfcc_t ** input, float32 ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = MFCC2FLOAT(input[0][i]); + + return i; +} + +/** + * Convert a block of float32 to mfcc_t (can be done in-place) + **/ +int32 +fe_float_to_mfcc(fe_t * fe, + float32 ** input, mfcc_t ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = FLOAT2MFCC(input[0][i]); + + return i; +} + +int32 +fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_spec2cep(fe, fr_spec, fr_cep); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_spec2cep(fe, powspec, fr_cep); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_dct2(fe, fr_spec, fr_cep, 0); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_dct2(fe, powspec, fr_cep, 0); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec) +{ +#ifdef FIXED_POINT + fe_dct3(fe, fr_cep, fr_spec); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + fe_dct3(fe, fr_cep, powspec); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + fr_spec[i] = (mfcc_t) powspec[i]; + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} |