diff options
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/fe')
18 files changed, 4981 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c b/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c new file mode 100644 index 000000000..cd2e1e2db --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c @@ -0,0 +1,776 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <stdlib.h> +#include <assert.h> +#ifdef _WIN32_WCE +#include <windows.h> +#else +#include <time.h> +#endif + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmd_ln.h" +#include "sphinxbase/ckd_alloc.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +static const arg_t fe_args[] = { + waveform_to_cepstral_command_line_macro(), + { NULL, 0, NULL, NULL } +}; + +int +fe_parse_general_params(cmd_ln_t *config, fe_t * fe) +{ + int j, frate; + + fe->config = config; + fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); + frate = cmd_ln_int32_r(config, "-frate"); + if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) { + E_ERROR + ("Frame rate %d can not be bigger than sample rate %.02f\n", + frate, fe->sampling_rate); + return -1; + } + + fe->frame_rate = (int16)frate; + if (cmd_ln_boolean_r(config, "-dither")) { + fe->dither = 1; + fe->seed = cmd_ln_int32_r(config, "-seed"); + } +#ifdef WORDS_BIGENDIAN + fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; +#else + fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; +#endif + fe->window_length = cmd_ln_float32_r(config, "-wlen"); + fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); + + fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); + fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); + + /* Check FFT size, compute FFT order (log_2(n)) */ + for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { + if (((j % 2) != 0) || (fe->fft_size <= 0)) { + E_ERROR("fft: number of points must be a power of 2 (is %d)\n", + fe->fft_size); + return -1; + } + } + /* Verify that FFT size is greater or equal to window length. */ + if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { + E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", + (int)(fe->window_length * fe->sampling_rate)); + return -1; + } + + fe->prespch_len = (int16)cmd_ln_int32_r(config, "-vad_prespeech"); + fe->postspch_len = (int16)cmd_ln_int32_r(config, "-vad_postspeech"); + fe->vad_threshold = cmd_ln_float32_r(config, "-vad_threshold"); + + fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); + fe->remove_noise = cmd_ln_boolean_r(config, "-remove_noise"); + fe->remove_silence = cmd_ln_boolean_r(config, "-remove_silence"); + + if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) + fe->transform = DCT_II; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) + fe->transform = LEGACY_DCT; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) + fe->transform = DCT_HTK; + else { + E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); + return -1; + } + + if (cmd_ln_boolean_r(config, "-logspec")) + fe->log_spec = RAW_LOG_SPEC; + if (cmd_ln_boolean_r(config, "-smoothspec")) + fe->log_spec = SMOOTH_LOG_SPEC; + + return 0; +} + +static int +fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) +{ + mel->sampling_rate = fe->sampling_rate; + mel->fft_size = fe->fft_size; + mel->num_cepstra = fe->num_cepstra; + mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); + + if (fe->log_spec) + fe->feature_dimension = mel->num_filters; + else + fe->feature_dimension = fe->num_cepstra; + + mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); + mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); + + mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); + + mel->warp_type = cmd_ln_str_r(config, "-warp_type"); + mel->warp_params = cmd_ln_str_r(config, "-warp_params"); + mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); + + mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); + mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); + + if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { + E_ERROR("Failed to initialize the warping function.\n"); + return -1; + } + fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); + return 0; +} + +void +fe_print_current(fe_t const *fe) +{ + E_INFO("Current FE Parameters:\n"); + E_INFO("\tSampling Rate: %f\n", fe->sampling_rate); + E_INFO("\tFrame Size: %d\n", fe->frame_size); + E_INFO("\tFrame Shift: %d\n", fe->frame_shift); + E_INFO("\tFFT Size: %d\n", fe->fft_size); + E_INFO("\tLower Frequency: %g\n", + fe->mel_fb->lower_filt_freq); + E_INFO("\tUpper Frequency: %g\n", + fe->mel_fb->upper_filt_freq); + E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters); + E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps); + E_INFO("\tStart Utt Status: %d\n", fe->start_flag); + E_INFO("Will %sremove DC offset at frame level\n", + fe->remove_dc ? "" : "not "); + if (fe->dither) { + E_INFO("Will add dither to audio\n"); + E_INFO("Dither seeded with %d\n", fe->seed); + } + else { + E_INFO("Will not add dither to audio\n"); + } + if (fe->mel_fb->lifter_val) { + E_INFO("Will apply sine-curve liftering, period %d\n", + fe->mel_fb->lifter_val); + } + E_INFO("Will %snormalize filters to unit area\n", + fe->mel_fb->unit_area ? "" : "not "); + E_INFO("Will %sround filter frequencies to DFT points\n", + fe->mel_fb->round_filters ? "" : "not "); + E_INFO("Will %suse double bandwidth in mel filter\n", + fe->mel_fb->doublewide ? "" : "not "); +} + +fe_t * +fe_init_auto() +{ + return fe_init_auto_r(cmd_ln_get()); +} + +fe_t * +fe_init_auto_r(cmd_ln_t *config) +{ + fe_t *fe; + int prespch_frame_len; + + fe = (fe_t*)ckd_calloc(1, sizeof(*fe)); + fe->refcount = 1; + + /* transfer params to front end */ + if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { + fe_free(fe); + return NULL; + } + + /* compute remaining fe parameters */ + /* We add 0.5 so approximate the float with the closest + * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 + */ + fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); + fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); + fe->prior = 0; + + fe_start_stream(fe); + + assert (fe->frame_shift > 1); + + if (fe->frame_size > (fe->fft_size)) { + E_ERROR + ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n", + fe->frame_size, fe->fft_size); + fe_free(fe); + return NULL; + } + + if (fe->dither) + fe_init_dither(fe->seed); + + /* establish buffers for overflow samps and hamming window */ + fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); + fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); + + /* create hamming window */ + fe_create_hamming(fe->hamming_window, fe->frame_size); + + /* init and fill appropriate filter structure */ + fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); + + /* transfer params to mel fb */ + fe_parse_melfb_params(config, fe, fe->mel_fb); + + if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) { + E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", + fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2); + fe_free(fe); + return NULL; + } + + fe_build_melfilters(fe->mel_fb); + + fe_compute_melcosine(fe->mel_fb); + if (fe->remove_noise || fe->remove_silence) + fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters); + + fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data)); + prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters; + fe->vad_data->prespch_buf = fe_prespch_init(fe->prespch_len + 1, prespch_frame_len, fe->frame_shift); + + /* Create temporary FFT, spectrum and mel-spectrum buffers. */ + /* FIXME: Gosh there are a lot of these. */ + fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); + fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); + fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); + fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); + + /* create twiddle factors */ + fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); + fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); + fe_create_twiddle(fe); + + if (cmd_ln_boolean_r(config, "-verbose")) { + fe_print_current(fe); + } + + /*** Initialize the overflow buffers ***/ + fe_start_utt(fe); + return fe; +} + +arg_t const * +fe_get_args(void) +{ + return fe_args; +} + +const cmd_ln_t * +fe_get_config(fe_t *fe) +{ + return fe->config; +} + +void +fe_init_dither(int32 seed) +{ + if (seed < 0) { + E_INFO("You are using the internal mechanism to generate the seed.\n"); +#ifdef _WIN32_WCE + s3_rand_seed(GetTickCount()); +#else + s3_rand_seed((long) time(0)); +#endif + } else { + E_INFO("You are using %d as the seed.\n", seed); + s3_rand_seed(seed); + } +} + +static void +fe_reset_vad_data(vad_data_t * vad_data) +{ + vad_data->global_state = 0; + vad_data->state_changed = 0; + vad_data->prespch_num = 0; + vad_data->postspch_num = 0; + fe_prespch_reset_cep(vad_data->prespch_buf); +} + +int32 +fe_start_utt(fe_t * fe) +{ + fe->num_overflow_samps = 0; + memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16)); + fe->start_flag = 1; + fe->prior = 0; + fe_reset_vad_data(fe->vad_data); + return 0; +} + +void +fe_start_stream(fe_t *fe) +{ + fe->sample_counter = 0; + fe_reset_noisestats(fe->noise_stats); +} + +int +fe_get_output_size(fe_t *fe) +{ + return (int)fe->feature_dimension; +} + +void +fe_get_input_size(fe_t *fe, int *out_frame_shift, + int *out_frame_size) +{ + if (out_frame_shift) + *out_frame_shift = fe->frame_shift; + if (out_frame_size) + *out_frame_size = fe->frame_size; +} + +uint8 +fe_get_vad_state(fe_t *fe) +{ + return fe->vad_data->global_state; +} + +int +fe_process_frames(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int32 *out_frameidx) +{ + int outidx, n_overflow, orig_n_overflow; + int16 const *orig_spch; + size_t orig_nsamps; + + /* In the special case where there is no output buffer, return the + * maximum number of frames which would be generated. */ + if (buf_cep == NULL) { + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) + *inout_nframes = 0; + else + *inout_nframes = 1 + + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) + / fe->frame_shift); + if (fe->vad_data->global_state) + *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf); + return *inout_nframes; + } + + if (out_frameidx) + *out_frameidx = 0; + + /* Are there not enough samples to make at least 1 frame? */ + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) { + if (*inout_nsamps > 0) { + /* Append them to the overflow buffer. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, *inout_nsamps * (sizeof(int16))); + fe->num_overflow_samps += *inout_nsamps; + /* Update input-output pointers and counters. */ + *inout_spch += *inout_nsamps; + *inout_nsamps = 0; + } + /* We produced no frames of output, sorry! */ + *inout_nframes = 0; + return 0; + } + + /* Can't write a frame? Then do nothing! */ + if (*inout_nframes < 1) { + *inout_nframes = 0; + return 0; + } + + /* Index of output frame. */ + outidx = 0; + + /* Try to read from prespeech buffer */ + if (fe->vad_data->global_state) { + while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) { + outidx++; + (*inout_nframes)--; + } + if ((*inout_nframes) < 1) { + /* mfcc buffer is filled from prespeech buffer */ + *inout_nframes = outidx; + return 0; + } + + /* Sets the start frame for the returned data so that caller can update timings */ + if (out_frameidx && fe->vad_data->state_changed) { + *out_frameidx = fe->sample_counter / fe->frame_shift - fe->prespch_len; + } + } + + /* Keep track of the original start of the buffer. */ + orig_spch = *inout_spch; + orig_nsamps = *inout_nsamps; + orig_n_overflow = fe->num_overflow_samps; + + /* Start processing, taking care of any incoming overflow. */ + if (fe->num_overflow_samps) { + int offset = fe->frame_size - fe->num_overflow_samps; + + /* Append start of spch to overflow samples to make a full frame. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, offset * sizeof(**inout_spch)); + fe_read_frame(fe, fe->overflow_samps, fe->frame_size); + /* Update input-output pointers and counters. */ + *inout_spch += offset; + *inout_nsamps -= offset; + fe->num_overflow_samps -= fe->frame_shift; + } else { + fe_read_frame(fe, *inout_spch, fe->frame_size); + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_size; + *inout_nsamps -= fe->frame_size; + } + + fe_write_frame(fe, buf_cep[outidx]); + + if (!fe->vad_data->state_changed && fe->vad_data->global_state) { + outidx++; + (*inout_nframes)--; + } + if (fe->vad_data->state_changed && fe->vad_data->global_state) { + /* previous frame triggered vad into speech state + * dumping prespeech buffer */ + while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) { + outidx++; + (*inout_nframes)--; + } + + /* Sets the start frame for the returned data so that caller can update timings */ + if (out_frameidx) { + *out_frameidx = (fe->sample_counter + orig_nsamps - *inout_nsamps) / fe->frame_shift - fe->prespch_len; + } + } + + /* Process all remaining frames. */ + while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) { + fe_shift_frame(fe, *inout_spch, fe->frame_shift); + fe_write_frame(fe, buf_cep[outidx]); + if (!fe->vad_data->state_changed && fe->vad_data->global_state) { + (*inout_nframes)--; + outidx++; + } + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_shift; + *inout_nsamps -= fe->frame_shift; + /* Amount of data behind the original input which is still needed. */ + if (fe->num_overflow_samps > 0) + fe->num_overflow_samps -= fe->frame_shift; + + if (fe->vad_data->state_changed && fe->vad_data->global_state) { + /* previous frame triggered vad into speech state */ + while (*inout_nframes > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) != 0) { + (*inout_nframes)--; + outidx++; + } + } + } + + /* How many relevant overflow samples are there left? */ + if (fe->num_overflow_samps <= 0) { + /* Maximum number of overflow samples past *inout_spch to save. */ + n_overflow = *inout_nsamps; + if (n_overflow > fe->frame_shift) + n_overflow = fe->frame_shift; + fe->num_overflow_samps = fe->frame_size - fe->frame_shift; + /* Make sure this isn't an illegal read! */ + if (fe->num_overflow_samps > *inout_spch - orig_spch) + fe->num_overflow_samps = *inout_spch - orig_spch; + fe->num_overflow_samps += n_overflow; + if (fe->num_overflow_samps > 0) { + memcpy(fe->overflow_samps, + *inout_spch - (fe->frame_size - fe->frame_shift), + fe->num_overflow_samps * sizeof(**inout_spch)); + /* Update the input pointer to cover this stuff. */ + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } else { + /* There is still some relevant data left in the overflow buffer. */ + /* Shift existing data to the beginning. */ + memmove(fe->overflow_samps, + fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps, + fe->num_overflow_samps * sizeof(*fe->overflow_samps)); + /* Copy in whatever we had in the original speech buffer. */ + n_overflow = *inout_spch - orig_spch + *inout_nsamps; + if (n_overflow > fe->frame_size - fe->num_overflow_samps) + n_overflow = fe->frame_size - fe->num_overflow_samps; + memcpy(fe->overflow_samps + fe->num_overflow_samps, + orig_spch, n_overflow * sizeof(*orig_spch)); + fe->num_overflow_samps += n_overflow; + /* Advance the input pointers. */ + if (n_overflow > *inout_spch - orig_spch) { + n_overflow -= (*inout_spch - orig_spch); + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } + + /* Finally update the frame counter with the number of frames + * and global sample counter with number of samples we procesed*/ + *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */ + fe->sample_counter += orig_nsamps - *inout_nsamps; + return 0; +} + +int +fe_process_frames_ext(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int16 **voiced_spch, + int32 *voiced_spch_nsamps, + int32 *out_frameidx) +{ + int proc_result; + + fe_prespch_extend_pcm(fe->vad_data->prespch_buf, *inout_nframes); + + fe->vad_data->store_pcm = TRUE; + proc_result = fe_process_frames(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes, out_frameidx); + fe->vad_data->store_pcm = FALSE; + + if (fe->vad_data->global_state) + fe_prespch_read_pcm(fe->vad_data->prespch_buf, voiced_spch, voiced_spch_nsamps); + else + *voiced_spch_nsamps = 0; + + return proc_result; +} + +int +fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps, + mfcc_t *** cep_block, int32 * nframes) +{ + mfcc_t **cep; + int rv; + + /* Figure out how many frames we will need. */ + fe_process_frames(fe, NULL, &nsamps, NULL, nframes, NULL); + /* Create the output buffer (it has to exist, even if there are no output frames). */ + if (*nframes) + cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep)); + else + cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep)); + /* Now just call fe_process_frames() with the allocated buffer. */ + rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes, NULL); + *cep_block = cep; + + return rv; +} + + +int32 +fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes) +{ + /* Process any remaining data. */ + *nframes = 0; + if (fe->num_overflow_samps > 0) { + fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps); + fe_write_frame(fe, cepvector); + if (!fe->vad_data->state_changed && fe->vad_data->global_state) + (*nframes)++; + } + + /* reset overflow buffers... */ + fe->num_overflow_samps = 0; + fe->start_flag = 0; + + return 0; +} + +fe_t * +fe_retain(fe_t *fe) +{ + ++fe->refcount; + return fe; +} + +int +fe_free(fe_t * fe) +{ + if (fe == NULL) + return 0; + if (--fe->refcount > 0) + return fe->refcount; + + /* kill FE instance - free everything... */ + if (fe->mel_fb) { + if (fe->mel_fb->mel_cosine) + fe_free_2d((void *) fe->mel_fb->mel_cosine); + ckd_free(fe->mel_fb->lifter); + ckd_free(fe->mel_fb->spec_start); + ckd_free(fe->mel_fb->filt_start); + ckd_free(fe->mel_fb->filt_width); + ckd_free(fe->mel_fb->filt_coeffs); + ckd_free(fe->mel_fb); + } + ckd_free(fe->spch); + ckd_free(fe->frame); + ckd_free(fe->ccc); + ckd_free(fe->sss); + ckd_free(fe->spec); + ckd_free(fe->mfspec); + ckd_free(fe->overflow_samps); + ckd_free(fe->hamming_window); + + if (fe->noise_stats) + fe_free_noisestats(fe->noise_stats); + + if (fe->vad_data) { + fe_prespch_free(fe->vad_data->prespch_buf); + ckd_free(fe->vad_data); + } + + cmd_ln_free_r(fe->config); + ckd_free(fe); + + return 0; +} + +/** + * Convert a block of mfcc_t to float32 (can be done in-place) + **/ +int32 +fe_mfcc_to_float(fe_t * fe, + mfcc_t ** input, float32 ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = MFCC2FLOAT(input[0][i]); + + return i; +} + +/** + * Convert a block of float32 to mfcc_t (can be done in-place) + **/ +int32 +fe_float_to_mfcc(fe_t * fe, + float32 ** input, mfcc_t ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = FLOAT2MFCC(input[0][i]); + + return i; +} + +int32 +fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_spec2cep(fe, fr_spec, fr_cep); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_spec2cep(fe, powspec, fr_cep); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_dct2(fe, fr_spec, fr_cep, 0); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_dct2(fe, powspec, fr_cep, 0); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec) +{ +#ifdef FIXED_POINT + fe_dct3(fe, fr_cep, fr_spec); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + fe_dct3(fe, fr_cep, powspec); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + fr_spec[i] = (mfcc_t) powspec[i]; + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h new file mode 100644 index 000000000..f6c943c72 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h @@ -0,0 +1,216 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __FE_INTERNAL_H__ +#define __FE_INTERNAL_H__ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_noise.h" +#include "fe_prespch_buf.h" +#include "fe_type.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Values for the 'logspec' field. */ +enum { + RAW_LOG_SPEC = 1, + SMOOTH_LOG_SPEC = 2 +}; + +/* Values for the 'transform' field. */ +enum { + LEGACY_DCT = 0, + DCT_II = 1, + DCT_HTK = 2 +}; + +typedef struct melfb_s melfb_t; +/** Base Struct to hold all structure for MFCC computation. */ +struct melfb_s { + float32 sampling_rate; + int32 num_cepstra; + int32 num_filters; + int32 fft_size; + float32 lower_filt_freq; + float32 upper_filt_freq; + /* DCT coefficients. */ + mfcc_t **mel_cosine; + /* Filter coefficients. */ + mfcc_t *filt_coeffs; + int16 *spec_start; + int16 *filt_start; + int16 *filt_width; + /* Luxury mobile home. */ + int32 doublewide; + char const *warp_type; + char const *warp_params; + uint32 warp_id; + /* Precomputed normalization constants for unitary DCT-II/DCT-III */ + mfcc_t sqrt_inv_n, sqrt_inv_2n; + /* Value and coefficients for HTK-style liftering */ + int32 lifter_val; + mfcc_t *lifter; + /* Normalize filters to unit area */ + int32 unit_area; + /* Round filter frequencies to DFT points (hurts accuracy, but is + useful for legacy purposes) */ + int32 round_filters; +}; + +typedef struct ringbuf_s { + powspec_t** bufs; + int16 buf_num; + int32 buf_len; + int16 start; + int16 end; + int32 recs; +} ringbuf_t; + +/* sqrt(1/2), also used for unitary DCT-II/DCT-III */ +#define SQRT_HALF FLOAT2MFCC(0.707106781186548) + +typedef struct vad_data_s { + uint8 global_state; + uint8 state_changed; + uint8 store_pcm; + int16 prespch_num; + int16 postspch_num; + prespch_buf_t* prespch_buf; +} vad_data_t; + +/** Structure for the front-end computation. */ +struct fe_s { + cmd_ln_t *config; + int refcount; + + int16 prespch_len; + int16 postspch_len; + float32 vad_threshold; + + float32 sampling_rate; + int16 frame_rate; + int16 frame_shift; + + float32 window_length; + int16 frame_size; + int16 fft_size; + + uint8 fft_order; + uint8 feature_dimension; + uint8 num_cepstra; + uint8 remove_dc; + uint8 log_spec; + uint8 swap; + uint8 dither; + uint8 transform; + uint8 remove_noise; + uint8 remove_silence; + + float32 pre_emphasis_alpha; + int32 seed; + + size_t sample_counter; + uint8 start_flag; + uint8 reserved; + + /* Twiddle factors for FFT. */ + frame_t *ccc, *sss; + /* Mel filter parameters. */ + melfb_t *mel_fb; + /* Half of a Hamming Window. */ + window_t *hamming_window; + /* Storage for noise removal */ + noise_stats_t *noise_stats; + + /* Storage for VAD variables */ + vad_data_t *vad_data; + + /* Temporary buffers for processing. */ + /* FIXME: too many of these. */ + int16 *spch; + frame_t *frame; + powspec_t *spec, *mfspec; + int16 *overflow_samps; + int16 num_overflow_samps; + int16 prior; +}; + +void fe_init_dither(int32 seed); + +/* Apply 1/2 bit noise to a buffer of audio. */ +int32 fe_dither(int16 *buffer, int32 nsamps); + +/* Load a frame of data into the fe. */ +int fe_read_frame(fe_t *fe, int16 const *in, int32 len); + +/* Shift the input buffer back and read more data. */ +int fe_shift_frame(fe_t *fe, int16 const *in, int32 len); + +/* Process a frame of data into features. */ +void fe_write_frame(fe_t *fe, mfcc_t *fea); + +/* Initialization functions. */ +int32 fe_build_melfilters(melfb_t *MEL_FB); +int32 fe_compute_melcosine(melfb_t *MEL_FB); +void fe_create_hamming(window_t *in, int32 in_len); +void fe_create_twiddle(fe_t *fe); + +fixed32 fe_log_add(fixed32 x, fixed32 y); +fixed32 fe_log_sub(fixed32 x, fixed32 y); + +/* Miscellaneous processing functions. */ +void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep); +void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk); +void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec); + +#ifdef __cplusplus +} +#endif + +#endif /* __FE_INTERNAL_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c new file mode 100644 index 000000000..4fb6d21a9 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c @@ -0,0 +1,425 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* This noise removal algorithm is inspired by the following papers + * Computationally Efficient Speech Enchancement by Spectral Minina Tracking + * by G. Doblinger + * + * Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition + * by C. Kim. + * + * For the recent research and state of art see papers about IMRCA and + * A Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency + * Cepstra For Robust Speech Recognition by Dong Yu and others + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_noise.h" +#include "fe_internal.h" + +/* Noise supression constants */ +#define SMOOTH_WINDOW 4 +#define LAMBDA_POWER 0.7 +#define LAMBDA_A 0.995 +#define LAMBDA_B 0.5 +#define LAMBDA_T 0.85 +#define MU_T 0.2 +#define MAX_GAIN 20 + +struct noise_stats_s { + /* Smoothed power */ + powspec_t *power; + /* Noise estimate */ + powspec_t *noise; + /* Signal floor estimate */ + powspec_t *floor; + /* Peak for temporal masking */ + powspec_t *peak; + + /* Initialize it next time */ + uint8 undefined; + /* Number of items to process */ + uint32 num_filters; + + /* Precomputed constants */ + powspec_t lambda_power; + powspec_t comp_lambda_power; + powspec_t lambda_a; + powspec_t comp_lambda_a; + powspec_t lambda_b; + powspec_t comp_lambda_b; + powspec_t lambda_t; + powspec_t mu_t; + powspec_t max_gain; + powspec_t inv_max_gain; + + powspec_t smooth_scaling[2 * SMOOTH_WINDOW + 3]; +}; + +static void +fe_lower_envelope(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * floor_buf, int32 num_filt) +{ + int i; + + for (i = 0; i < num_filt; i++) { +#ifndef FIXED_POINT + if (buf[i] >= floor_buf[i]) { + floor_buf[i] = + noise_stats->lambda_a * floor_buf[i] + noise_stats->comp_lambda_a * buf[i]; + } + else { + floor_buf[i] = + noise_stats->lambda_b * floor_buf[i] + noise_stats->comp_lambda_b * buf[i]; + } +#else + if (buf[i] >= floor_buf[i]) { + floor_buf[i] = fe_log_add(noise_stats->lambda_a + floor_buf[i], + noise_stats->comp_lambda_a + buf[i]); + } + else { + floor_buf[i] = fe_log_add(noise_stats->lambda_b + floor_buf[i], + noise_stats->comp_lambda_b + buf[i]); + } +#endif + } +} + +/* temporal masking */ +static void +fe_temp_masking(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * peak, int32 num_filt) +{ + powspec_t cur_in; + int i; + + for (i = 0; i < num_filt; i++) { + cur_in = buf[i]; + +#ifndef FIXED_POINT + peak[i] *= noise_stats->lambda_t; + if (buf[i] < noise_stats->lambda_t * peak[i]) + buf[i] = peak[i] * noise_stats->mu_t; +#else + peak[i] += noise_stats->lambda_t; + if (buf[i] < noise_stats->lambda_t + peak[i]) + buf[i] = peak[i] + noise_stats->mu_t; +#endif + + if (cur_in > peak[i]) + peak[i] = cur_in; + } +} + +/* spectral weight smoothing */ +static void +fe_weight_smooth(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * coefs, int32 num_filt) +{ + int i, j; + int l1, l2; + powspec_t coef; + + for (i = 0; i < num_filt; i++) { + l1 = ((i - SMOOTH_WINDOW) > 0) ? (i - SMOOTH_WINDOW) : 0; + l2 = ((i + SMOOTH_WINDOW) < + (num_filt - 1)) ? (i + SMOOTH_WINDOW) : (num_filt - 1); + +#ifndef FIXED_POINT + coef = 0; + for (j = l1; j <= l2; j++) { + coef += coefs[j]; + } + buf[i] = buf[i] * (coef / (l2 - l1 + 1)); +#else + coef = MIN_FIXLOG; + for (j = l1; j <= l2; j++) { + coef = fe_log_add(coef, coefs[j]); + } + buf[i] = buf[i] + coef - noise_stats->smooth_scaling[l2 - l1 + 1]; +#endif + + } +} + +noise_stats_t * +fe_init_noisestats(int num_filters) +{ + int i; + noise_stats_t *noise_stats; + + noise_stats = (noise_stats_t *) ckd_calloc(1, sizeof(noise_stats_t)); + + noise_stats->power = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->noise = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->floor = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->peak = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + + noise_stats->undefined = TRUE; + noise_stats->num_filters = num_filters; + +#ifndef FIXED_POINT + noise_stats->lambda_power = LAMBDA_POWER; + noise_stats->comp_lambda_power = 1 - LAMBDA_POWER; + noise_stats->lambda_a = LAMBDA_A; + noise_stats->comp_lambda_a = 1 - LAMBDA_A; + noise_stats->lambda_b = LAMBDA_B; + noise_stats->comp_lambda_b = 1 - LAMBDA_B; + noise_stats->lambda_t = LAMBDA_T; + noise_stats->mu_t = MU_T; + noise_stats->max_gain = MAX_GAIN; + noise_stats->inv_max_gain = 1.0 / MAX_GAIN; + + for (i = 1; i < 2 * SMOOTH_WINDOW + 1; i++) { + noise_stats->smooth_scaling[i] = 1.0 / i; + } +#else + noise_stats->lambda_power = FLOAT2FIX(log(LAMBDA_POWER)); + noise_stats->comp_lambda_power = FLOAT2FIX(log(1 - LAMBDA_POWER)); + noise_stats->lambda_a = FLOAT2FIX(log(LAMBDA_A)); + noise_stats->comp_lambda_a = FLOAT2FIX(log(1 - LAMBDA_A)); + noise_stats->lambda_b = FLOAT2FIX(log(LAMBDA_B)); + noise_stats->comp_lambda_b = FLOAT2FIX(log(1 - LAMBDA_B)); + noise_stats->lambda_t = FLOAT2FIX(log(LAMBDA_T)); + noise_stats->mu_t = FLOAT2FIX(log(MU_T)); + noise_stats->max_gain = FLOAT2FIX(log(MAX_GAIN)); + noise_stats->inv_max_gain = FLOAT2FIX(log(1.0 / MAX_GAIN)); + + for (i = 1; i < 2 * SMOOTH_WINDOW + 3; i++) { + noise_stats->smooth_scaling[i] = FLOAT2FIX(log(i)); + } +#endif + + return noise_stats; +} + +void +fe_reset_noisestats(noise_stats_t * noise_stats) +{ + if (noise_stats) + noise_stats->undefined = TRUE; +} + +void +fe_free_noisestats(noise_stats_t * noise_stats) +{ + ckd_free(noise_stats->power); + ckd_free(noise_stats->noise); + ckd_free(noise_stats->floor); + ckd_free(noise_stats->peak); + ckd_free(noise_stats); +} + +/** + * For fixed point we are doing the computation in a fixlog domain, + * so we have to add many processing cases. + */ +void +fe_track_snr(fe_t * fe, int32 *in_speech) +{ + powspec_t *signal; + powspec_t *gain; + noise_stats_t *noise_stats; + powspec_t *mfspec; + int32 i, num_filts; + powspec_t lrt, snr, max_signal, log_signal; + + if (!(fe->remove_noise || fe->remove_silence)) { + *in_speech = TRUE; + return; + } + + noise_stats = fe->noise_stats; + mfspec = fe->mfspec; + num_filts = noise_stats->num_filters; + + signal = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); + + if (noise_stats->undefined) { + for (i = 0; i < num_filts; i++) { + noise_stats->power[i] = mfspec[i]; + noise_stats->noise[i] = mfspec[i]; +#ifndef FIXED_POINT + noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain; + noise_stats->peak[i] = 0.0; +#else + noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain; + noise_stats->peak[i] = MIN_FIXLOG; +#endif + } + noise_stats->undefined = FALSE; + } + + /* Calculate smoothed power */ + for (i = 0; i < num_filts; i++) { +#ifndef FIXED_POINT + noise_stats->power[i] = + noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i]; +#else + noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i], + noise_stats->comp_lambda_power + mfspec[i]); +#endif + } + + /* Noise estimation and vad decision */ + fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts); + + lrt = FLOAT2FIX(0.0f); + max_signal = FLOAT2FIX(0.0f); + for (i = 0; i < num_filts; i++) { +#ifndef FIXED_POINT + signal[i] = noise_stats->power[i] - noise_stats->noise[i]; + if (signal[i] < 1.0) + signal[i] = 1.0; + snr = log(noise_stats->power[i] / noise_stats->noise[i]); + log_signal = log(signal[i]); +#else + signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]); + snr = noise_stats->power[i] - noise_stats->noise[i]; + log_signal = signal[i]; +#endif + if (snr > lrt) { + lrt = snr; + if (log_signal > max_signal) { + max_signal = log_signal; + } + } + } + +#ifndef FIXED_POINT + if (fe->remove_silence && (lrt < fe->vad_threshold || max_signal < fe->vad_threshold)) { +#else + if (fe->remove_silence && (lrt < FLOAT2FIX(fe->vad_threshold) || max_signal < FLOAT2FIX(fe->vad_threshold))) { +#endif + *in_speech = FALSE; + } else { + *in_speech = TRUE; + } + + fe_lower_envelope(noise_stats, signal, noise_stats->floor, num_filts); + + fe_temp_masking(noise_stats, signal, noise_stats->peak, num_filts); + + if (!fe->remove_noise) { + //no need for further calculations if noise cancellation disabled + ckd_free(signal); + return; + } + + for (i = 0; i < num_filts; i++) { + if (signal[i] < noise_stats->floor[i]) + signal[i] = noise_stats->floor[i]; + } + + gain = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); +#ifndef FIXED_POINT + for (i = 0; i < num_filts; i++) { + if (signal[i] < noise_stats->max_gain * noise_stats->power[i]) + gain[i] = signal[i] / noise_stats->power[i]; + else + gain[i] = noise_stats->max_gain; + if (gain[i] < noise_stats->inv_max_gain) + gain[i] = noise_stats->inv_max_gain; + } +#else + for (i = 0; i < num_filts; i++) { + gain[i] = signal[i] - noise_stats->power[i]; + if (gain[i] > noise_stats->max_gain) + gain[i] = noise_stats->max_gain; + if (gain[i] < noise_stats->inv_max_gain) + gain[i] = noise_stats->inv_max_gain; + } +#endif + + /* Weight smoothing and time frequency normalization */ + fe_weight_smooth(noise_stats, mfspec, gain, num_filts); + + ckd_free(gain); + ckd_free(signal); +} + +void +fe_vad_hangover(fe_t * fe, mfcc_t * fea, int32 is_speech) +{ + /* track vad state and deal with cepstrum prespeech buffer */ + fe->vad_data->state_changed = 0; + if (is_speech) { + fe->vad_data->postspch_num = 0; + if (!fe->vad_data->global_state) { + fe->vad_data->prespch_num++; + fe_prespch_write_cep(fe->vad_data->prespch_buf, fea); + /* check for transition sil->speech */ + if (fe->vad_data->prespch_num >= fe->prespch_len) { + fe->vad_data->prespch_num = 0; + fe->vad_data->global_state = 1; + /* transition silence->speech occurred */ + fe->vad_data->state_changed = 1; + } + } + } else { + fe->vad_data->prespch_num = 0; + fe_prespch_reset_cep(fe->vad_data->prespch_buf); + if (fe->vad_data->global_state) { + fe->vad_data->postspch_num++; + /* check for transition speech->sil */ + if (fe->vad_data->postspch_num >= fe->postspch_len) { + fe->vad_data->postspch_num = 0; + fe->vad_data->global_state = 0; + /* transition speech->silence occurred */ + fe->vad_data->state_changed = 1; + } + } + } + + if (fe->vad_data->store_pcm) { + if (is_speech || fe->vad_data->global_state) + fe_prespch_write_pcm(fe->vad_data->prespch_buf, fe->spch); + if (!is_speech && !fe->vad_data->global_state) + fe_prespch_reset_pcm(fe->vad_data->prespch_buf); + } +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_noise.h b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.h new file mode 100644 index 000000000..b633a4cec --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.h @@ -0,0 +1,66 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef FE_NOISE_H +#define FE_NOISE_H + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" +#include "fe_type.h" + +typedef struct noise_stats_s noise_stats_t; + +/* Creates noisestats object */ +noise_stats_t *fe_init_noisestats(int num_filters); + +/* Resets collected noise statistics */ +void fe_reset_noisestats(noise_stats_t * noise_stats); + +/* Frees allocated data */ +void fe_free_noisestats(noise_stats_t * noise_stats); + +/** + * Process frame, update noise statistics, remove noise components if needed, + * and return local vad decision. + */ +void fe_track_snr(fe_t *fe, int32 *in_speech); + +/** + * Updates global state based on local VAD state smoothing the estimate. + */ +void fe_vad_hangover(fe_t *fe, mfcc_t *fea, int32 is_speech); + +#endif /* FE_NOISE_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c new file mode 100644 index 000000000..028c09ac5 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c @@ -0,0 +1,182 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== +* Copyright (c) 2013 Carnegie Mellon University. All rights +* reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* +* This work was supported in part by funding from the Defense Advanced +* Research Projects Agency and the National Science Foundation of the +* United States of America, and the CMU Sphinx Speech Consortium. +* +* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND +* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY +* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* ==================================================================== +* +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +#include "fe_prespch_buf.h" + +struct prespch_buf_s { + /* saved mfcc frames */ + mfcc_t **cep_buf; + /* saved pcm audio */ + int16 *pcm_buf; + /* flag for pcm buffer initialization */ + int16 cep_write_ptr; + /* read pointer for cep buffer */ + int16 cep_read_ptr; + /* write pointer for pcm buffer */ + int16 pcm_write_ptr; + /* frames amount in cep buffer */ + int16 num_frames_cep; + /* frames amount in pcm buffer */ + int16 num_frames_pcm; + /* filters amount */ + int16 num_cepstra; + /* amount of fresh samples in frame */ + int16 num_samples; +}; + +prespch_buf_t * +fe_prespch_init(int num_frames, int num_cepstra, int num_samples) +{ + prespch_buf_t *prespch_buf; + + prespch_buf = (prespch_buf_t *) ckd_calloc(1, sizeof(prespch_buf_t)); + + prespch_buf->num_cepstra = num_cepstra; + prespch_buf->num_frames_cep = num_frames; + prespch_buf->num_samples = num_samples; + prespch_buf->num_frames_pcm = 0; + prespch_buf->cep_write_ptr = 0; + prespch_buf->cep_read_ptr = 0; + prespch_buf->pcm_write_ptr = 0; + + prespch_buf->cep_buf = (mfcc_t **) + ckd_calloc_2d(num_frames, num_cepstra, + sizeof(**prespch_buf->cep_buf)); + + return prespch_buf; +} + +void +fe_prespch_extend_pcm(prespch_buf_t* prespch_buf, int num_frames_pcm) +{ + num_frames_pcm += prespch_buf->num_frames_cep + 1; + if (num_frames_pcm > prespch_buf->num_frames_pcm) { + prespch_buf->num_frames_pcm = num_frames_pcm; + prespch_buf->pcm_buf = (int16 *) ckd_realloc(prespch_buf->pcm_buf, prespch_buf->num_frames_pcm * prespch_buf->num_samples * sizeof(int16)); + } +} + +int +fe_prespch_read_cep(prespch_buf_t * prespch_buf, mfcc_t * fea) +{ + if (prespch_buf->cep_read_ptr >= prespch_buf->num_frames_cep) + return 0; + if (prespch_buf->cep_read_ptr >= prespch_buf->cep_write_ptr) + return 0; + memcpy(fea, prespch_buf->cep_buf[prespch_buf->cep_read_ptr], + sizeof(mfcc_t) * prespch_buf->num_cepstra); + prespch_buf->cep_read_ptr++; + return 1; +} + +void +fe_prespch_write_cep(prespch_buf_t * prespch_buf, mfcc_t * fea) +{ + assert(prespch_buf->cep_write_ptr < prespch_buf->num_frames_cep); + memcpy(prespch_buf->cep_buf[prespch_buf->cep_write_ptr], fea, + sizeof(mfcc_t) * prespch_buf->num_cepstra); + prespch_buf->cep_write_ptr++; +} + +void +fe_prespch_read_pcm(prespch_buf_t * prespch_buf, int16 ** samples, + int32 * samples_num) +{ + if (!prespch_buf->pcm_buf) { + /* pcm prespch buffer isn't initialized yet */ + samples = NULL; + *samples_num = 0; + return; + } + *samples = prespch_buf->pcm_buf; + *samples_num = prespch_buf->pcm_write_ptr * prespch_buf->num_samples; + prespch_buf->pcm_write_ptr = 0; +} + +void +fe_prespch_write_pcm(prespch_buf_t * prespch_buf, int16 * samples) +{ + int32 sample_ptr; + + assert(prespch_buf->pcm_write_ptr < prespch_buf->num_frames_pcm); + sample_ptr = prespch_buf->pcm_write_ptr * prespch_buf->num_samples; + memcpy(&prespch_buf->pcm_buf[sample_ptr], samples, + prespch_buf->num_samples * sizeof(int16)); + prespch_buf->pcm_write_ptr++; +} + +void +fe_prespch_reset_cep(prespch_buf_t * prespch_buf) +{ + prespch_buf->cep_read_ptr = 0; + prespch_buf->cep_write_ptr = 0; +} + +void +fe_prespch_reset_pcm(prespch_buf_t * prespch_buf) +{ + prespch_buf->pcm_write_ptr = 0; +} + +void +fe_prespch_free(prespch_buf_t * prespch_buf) +{ + if (!prespch_buf) + return; + if (prespch_buf->cep_buf) + ckd_free_2d((void **) prespch_buf->cep_buf); + if (prespch_buf->pcm_buf) + ckd_free(prespch_buf->pcm_buf); + ckd_free(prespch_buf); +} + +int32 +fe_prespch_ncep(prespch_buf_t * prespch_buf) +{ + return prespch_buf->cep_write_ptr - prespch_buf->cep_read_ptr; +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h new file mode 100644 index 000000000..d349ddfdb --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h @@ -0,0 +1,79 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* Buffer that maintains both features and raw audio for the VAD implementation */ + +#ifndef FE_INTERNAL_H +#define FE_INTERNAL_H + +#include "sphinxbase/fe.h" + +typedef struct prespch_buf_s prespch_buf_t; + +/* Creates prespeech buffer */ +prespch_buf_t *fe_prespch_init(int num_frames, int num_cepstra, + int num_samples); + +/* Extends pcm prespeech buffer with specified amount of frames */ +void fe_prespch_extend_pcm(prespch_buf_t* prespch_buf, int num_frames_pcm); + +/* Reads mfcc frame from prespeech buffer */ +int fe_prespch_read_cep(prespch_buf_t * prespch_buf, mfcc_t * fea); + +/* Writes mfcc frame to prespeech buffer */ +void fe_prespch_write_cep(prespch_buf_t * prespch_buf, mfcc_t * fea); + +/* Reads pcm frame from prespeech buffer */ +void fe_prespch_read_pcm(prespch_buf_t * prespch_buf, int16 ** samples, + int32 * samples_num); + +/* Writes pcm frame to prespeech buffer */ +void fe_prespch_write_pcm(prespch_buf_t * prespch_buf, int16 * samples); + +/* Resets read/write pointers for cepstrum buffer */ +void fe_prespch_reset_cep(prespch_buf_t * prespch_buf); + +/* Resets read/write pointer for pcm audio buffer */ +void fe_prespch_reset_pcm(prespch_buf_t * prespch_buf); + +/* Releases prespeech buffer */ +void fe_prespch_free(prespch_buf_t * prespch_buf); + +/* Returns number of accumulated frames */ +int32 fe_prespch_ncep(prespch_buf_t * prespch_buf); + +#endif /* FE_INTERNAL_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c b/media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c new file mode 100644 index 000000000..577640f62 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c @@ -0,0 +1,1377 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +/** + * Windows math.h does not contain M_PI + */ +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/fe.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +/* Use extra precision for cosines, Hamming window, pre-emphasis + * coefficient, twiddle factors. */ +#ifdef FIXED_POINT +#define FLOAT2COS(x) FLOAT2FIX_ANY(x,30) +#define COSMUL(x,y) FIXMUL_ANY(x,y,30) +#else +#define FLOAT2COS(x) (x) +#define COSMUL(x,y) ((x)*(y)) +#endif + +#ifdef FIXED_POINT + +/* Internal log-addition table for natural log with radix point at 8 + * bits. Each entry is 256 * log(1 + e^{-n/256}). This is used in the + * log-add computation: + * + * e^z = e^x + e^y + * e^z = e^x(1 + e^{y-x}) = e^y(1 + e^{x-y}) + * z = x + log(1 + e^{y-x}) = y + log(1 + e^{x-y}) + * + * So when y > x, z = y + logadd_table[-(x-y)] + * when x > y, z = x + logadd_table[-(y-x)] + */ +static const unsigned char fe_logadd_table[] = { + 177, 177, 176, 176, 175, 175, 174, 174, 173, 173, + 172, 172, 172, 171, 171, 170, 170, 169, 169, 168, + 168, 167, 167, 166, 166, 165, 165, 164, 164, 163, + 163, 162, 162, 161, 161, 161, 160, 160, 159, 159, + 158, 158, 157, 157, 156, 156, 155, 155, 155, 154, + 154, 153, 153, 152, 152, 151, 151, 151, 150, 150, + 149, 149, 148, 148, 147, 147, 147, 146, 146, 145, + 145, 144, 144, 144, 143, 143, 142, 142, 141, 141, + 141, 140, 140, 139, 139, 138, 138, 138, 137, 137, + 136, 136, 136, 135, 135, 134, 134, 134, 133, 133, + 132, 132, 131, 131, 131, 130, 130, 129, 129, 129, + 128, 128, 128, 127, 127, 126, 126, 126, 125, 125, + 124, 124, 124, 123, 123, 123, 122, 122, 121, 121, + 121, 120, 120, 119, 119, 119, 118, 118, 118, 117, + 117, 117, 116, 116, 115, 115, 115, 114, 114, 114, + 113, 113, 113, 112, 112, 112, 111, 111, 110, 110, + 110, 109, 109, 109, 108, 108, 108, 107, 107, 107, + 106, 106, 106, 105, 105, 105, 104, 104, 104, 103, + 103, 103, 102, 102, 102, 101, 101, 101, 100, 100, + 100, 99, 99, 99, 98, 98, 98, 97, 97, 97, + 96, 96, 96, 96, 95, 95, 95, 94, 94, 94, + 93, 93, 93, 92, 92, 92, 92, 91, 91, 91, + 90, 90, 90, 89, 89, 89, 89, 88, 88, 88, + 87, 87, 87, 87, 86, 86, 86, 85, 85, 85, + 85, 84, 84, 84, 83, 83, 83, 83, 82, 82, + 82, 82, 81, 81, 81, 80, 80, 80, 80, 79, + 79, 79, 79, 78, 78, 78, 78, 77, 77, 77, + 77, 76, 76, 76, 75, 75, 75, 75, 74, 74, + 74, 74, 73, 73, 73, 73, 72, 72, 72, 72, + 71, 71, 71, 71, 71, 70, 70, 70, 70, 69, + 69, 69, 69, 68, 68, 68, 68, 67, 67, 67, + 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, + 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, + 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, + 60, 60, 60, 60, 59, 59, 59, 59, 59, 58, + 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, + 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, + 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, + 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, + 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, + 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, + 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, + 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, + 44, 44, 43, 43, 43, 43, 43, 43, 43, 42, + 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, + 41, 41, 40, 40, 40, 40, 40, 40, 40, 39, + 39, 39, 39, 39, 39, 39, 38, 38, 38, 38, + 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, + 37, 36, 36, 36, 36, 36, 36, 36, 35, 35, + 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, + 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, + 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0 +}; + +static const int fe_logadd_table_size = + sizeof(fe_logadd_table) / sizeof(fe_logadd_table[0]); + +fixed32 +fe_log_add(fixed32 x, fixed32 y) +{ + fixed32 d, r; + + if (x > y) { + d = (x - y) >> (DEFAULT_RADIX - 8); + r = x; + } + else { + d = (y - x) >> (DEFAULT_RADIX - 8); + r = y; + } + + if (r <= MIN_FIXLOG) + return MIN_FIXLOG; + else if (d > fe_logadd_table_size - 1) + return r; + else { + r += ((fixed32) fe_logadd_table[d] << (DEFAULT_RADIX - 8)); +/* printf("%d - %d = %d | %f - %f = %f | %f - %f = %f\n", + x, y, r, FIX2FLOAT(x), FIX2FLOAT(y), FIX2FLOAT(r), + exp(FIX2FLOAT(x)), exp(FIX2FLOAT(y)), exp(FIX2FLOAT(r))); +*/ + return r; + } +} + +/* + * log_sub for spectral subtraction, similar to logadd but we had + * to smooth function around zero with fixlog in order to improve + * table interpolation properties + * + * The table is created with the file included into distribution + * + * e^z = e^x - e^y + * e^z = e^x (1 - e^(-(x - y))) + * z = x + log(1 - e^(-(x - y))) + * z = x + fixlog(a) + (log(1 - e^(- a)) - log(a)) + * + * Input radix is 8 output radix is 10 + */ +static const uint16 fe_logsub_table[] = { +1, 3, 5, 7, 9, 11, 13, 15, 17, 19, +21, 23, 25, 27, 29, 31, 33, 35, 37, 39, +41, 43, 45, 47, 49, 51, 53, 55, 56, 58, +60, 62, 64, 66, 68, 70, 72, 74, 76, 78, +80, 82, 84, 86, 88, 90, 92, 94, 95, 97, +99, 101, 103, 105, 107, 109, 111, 113, 115, 117, +119, 121, 122, 124, 126, 128, 130, 132, 134, 136, +138, 140, 142, 143, 145, 147, 149, 151, 153, 155, +157, 159, 161, 162, 164, 166, 168, 170, 172, 174, +176, 178, 179, 181, 183, 185, 187, 189, 191, 193, +194, 196, 198, 200, 202, 204, 206, 207, 209, 211, +213, 215, 217, 219, 220, 222, 224, 226, 228, 230, +232, 233, 235, 237, 239, 241, 243, 244, 246, 248, +250, 252, 254, 255, 257, 259, 261, 263, 265, 266, +268, 270, 272, 274, 275, 277, 279, 281, 283, 284, +286, 288, 290, 292, 294, 295, 297, 299, 301, 302, +304, 306, 308, 310, 311, 313, 315, 317, 319, 320, +322, 324, 326, 327, 329, 331, 333, 335, 336, 338, +340, 342, 343, 345, 347, 349, 350, 352, 354, 356, +357, 359, 361, 363, 364, 366, 368, 370, 371, 373, +375, 377, 378, 380, 382, 384, 385, 387, 389, 391, +392, 394, 396, 397, 399, 401, 403, 404, 406, 408, +410, 411, 413, 415, 416, 418, 420, 422, 423, 425, +427, 428, 430, 432, 433, 435, 437, 439, 440, 442, +444, 445, 447, 449, 450, 452, 454, 455, 457, 459, +460, 462, 464, 465, 467, 469, 471, 472, 474, 476, +477, 479, 481, 482, 484, 486, 487, 489, 490, 492, +494, 495, 497, 499, 500, 502, 504, 505, 507, 509, +510, 512, 514, 515, 517, 518, 520, 522, 523, 525, +527, 528, 530, 532, 533, 535, 536, 538, 540, 541, +543, 544, 546, 548, 549, 551, 553, 554, 556, 557, +559, 561, 562, 564, 565, 567, 569, 570, 572, 573, +575, 577, 578, 580, 581, 583, 585, 586, 588, 589, +591, 592, 594, 596, 597, 599, 600, 602, 603, 605, +607, 608, 610, 611, 613, 614, 616, 618, 619, 621, +622, 624, 625, 627, 628, 630, 632, 633, 635, 636, +638, 639, 641, 642, 644, 645, 647, 649, 650, 652, +653, 655, 656, 658, 659, 661, 662, 664, 665, 667, +668, 670, 671, 673, 674, 676, 678, 679, 681, 682, +684, 685, 687, 688, 690, 691, 693, 694, 696, 697, +699, 700, 702, 703, 705, 706, 708, 709, 711, 712, +714, 715, 717, 718, 719, 721, 722, 724, 725, 727, +728, 730, 731, 733, 734, 736, 737, 739, 740, 742, +743, 745, 746, 747, 749, 750, 752, 753, 755, 756, +758, 759, 761, 762, 763, 765, 766, 768, 769, 771, +772, 774, 775, 776, 778, 779, 781, 782, 784, 785, +786, 788, 789, 791, 792, 794, 795, 796, 798, 799, +801, 802, 804, 805, 806, 808, 809, 811, 812, 813, +815, 816, 818, 819, 820, 822, 823, 825, 826, 827, +829, 830, 832, 833, 834, 836, 837, 839, 840, 841, +843, 844, 846, 847, 848, 850, 851, 852, 854, 855, +857, 858, 859, 861, 862, 863, 865, 866, 868, 869, +870, 872, 873, 874, 876, 877, 878, 880, 881, 883, +884, 885, 887, 888, 889, 891, 892, 893, 895, 896, +897, 899, 900, 901, 903, 904, 905, 907, 908, 909, +911, 912, 913, 915, 916, 917, 919, 920, 921, 923, +924, 925, 927, 928, 929, 931, 932, 933, 935, 936, +937, 939, 940, 941, 942, 944, 945, 946, 948, 949, +950, 952, 953, 954, 956, 957, 958, 959, 961, 962, +963, 965, 966, 967, 968, 970, 971, 972, 974, 975, +976, 977, 979, 980, 981, 983, 984, 985, 986, 988, +989, 990, 991, 993, 994, 995, 997, 998, 999, 1000, +1002, 1003, 1004, 1005, 1007, 1008, 1009, 1010, 1012, 1013, +1014, 1015, 1017, 1018, 1019, 1020, 1022, 1023, 1024, 1025, +1027, 1028, 1029, 1030, 1032, 1033, 1034, 1035, 1037, 1038, +1039, 1040, 1041, 1043, 1044, 1045, 1046, 1048, 1049, 1050, +1051, 1052, 1054, 1055, 1056, 1057, 1059, 1060, 1061, 1062, +1063, 1065, 1066, 1067, 1068, 1069, 1071, 1072, 1073, 1074, +1076, 1077, 1078, 1079, 1080, 1082, 1083, 1084, 1085, 1086, +1087, 1089, 1090, 1091, 1092, 1093, 1095, 1096, 1097, 1098, +1099, 1101, 1102, 1103, 1104, 1105, 1106, 1108, 1109, 1110, +1111, 1112, 1114, 1115, 1116, 1117, 1118, 1119, 1121, 1122, +1123, 1124, 1125, 1126, 1128, 1129, 1130, 1131, 1132, 1133, +1135, 1136, 1137, 1138, 1139, 1140, 1141, 1143, 1144, 1145, +1146, 1147, 1148, 1149, 1151, 1152, 1153, 1154, 1155, 1156, +1157, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1167, 1168, +1169, 1170, 1171, 1172, 1173, 1174, 1176, 1177, 1178, 1179, +1180, 1181, 1182, 1183, 1185, 1186, 1187, 1188, 1189, 1190, +1191, 1192, 1193, 1195, 1196, 1197, 1198, 1199, 1200, 1201, +1202, 1203, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, +1213, 1214, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, +1224, 1225, 1226, 1228, 1229, 1230, 1231, 1232, 1233, 1234, +1235, 1236, 1237, 1238, 1239, 1240, 1242, 1243, 1244, 1245, +1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, +1256, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, +1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1277, +1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, +1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, +1298, 1299, 1300, 1301, 1302, 1303, 1305, 1306, 1307, 1308, +1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, +1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, +1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, +1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, +1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, +1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, +1369, 1370, 1371, 1372, 1372, 1373, 1374, 1375, 1376, 1377, +1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, +1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, +1398, 1399, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, +1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, +1417, 1418, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, +1426, 1427, 1428, 1429, 1430, 1431, 1432, 1432, 1433, 1434, +1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, +1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, +1454, 1455, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, +1463, 1464, 1465, 1466, 1466, 1467, 1468, 1469, 1470, 1471, +1472, 1473, 1474, 1475, 1475, 1476, 1477, 1478, 1479, 1480, +1481, 1482, 1483, 1483, 1484, 1485, 1486, 1487, 1488, 1489, +1490, 1491, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, +1499, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1506, +1507, 1508, 1509, 1510, 1511, 1512, 1513, 1513, 1514, 1515, +1516, 1517, 1518, 1519, 1520, 1520, 1521, 1522, 1523, 1524, +1525, 1526, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1532, +1533, 1534, 1535, 1536, 1537, 1538, 1538, 1539, 1540, 1541, +1542, 1543, 1544, 1544, 1545, 1546, 1547, 1548, 1549, 1550, +1550, 1551, 1552, 1553, 1554, 1555, 1555, 1556, 1557, 1558, +1559, 1560, 1560, 1561, 1562, 1563, 1564, 1565, 1565, 1566, +1567, 1568, 1569, 1570, 1570, 1571, 1572, 1573, 1574, 1575, +1575, 1576, 1577, 1578, 1579, 1580, 1580, 1581, 1582, 1583, +1584, 1584, 1585, 1586, 1587, 1588, 1589, 1589, 1590, 1591, +1592, 1593, 1593, 1594, 1595, 1596, 1597, 1598, 1598, 1599, +1600, 1601, 1602, 1602, 1603, 1604, 1605, 1606, 1606, 1607, +1608, 1609, 1610, 1610, 1611, 1612, 1613, 1614, 1614, 1615, +1616, 1617, 1618, 1618, 1619, 1620, 1621, 1622, 1622, 1623, +1624, 1625, 1626, 1626, 1627, 1628, 1629, 1630, 1630, 1631, +1632, 1633, 1634, 1634, 1635, 1636, 1637, 1637, 1638, 1639, +1640, 1641, 1641, 1642, 1643, 1644, 1645, 1645, 1646, 1647, +1648, 1648, 1649, 1650, 1651, 1652, 1652, 1653, 1654, 1655, +1655, 1656, 1657, 1658, 1658, 1659, 1660, 1661, 1662, 1662, +1663, 1664, 1665, 1665, 1666, 1667, 1668, 1668, 1669, 1670, +1671, 1671, 1672, 1673, 1674, 1675, 1675, 1676, 1677, 1678, +1678, 1679, 1680, 1681, 1681, 1682, 1683, 1684, 1684, 1685, +1686, 1687, 1687, 1688, 1689, 1690, 1690, 1691, 1692, 1693, +1693, 1694, 1695, 1696, 1696, 1697, 1698, 1699, 1699, 1700, +1701, 1702, 1702, 1703, 1704, 1705, 1705, 1706, 1707, 1707, +1708, 1709, 1710, 1710, 1711, 1712, 1713, 1713, 1714, 1715, +1716, 1716, 1717, 1718, 1718, 1719, 1720, 1721, 1721, 1722, +1723, 1724, 1724, 1725, 1726, 1727, 1727, 1728, 1729, 1729, +1730, 1731, 1732, 1732, 1733, 1734, 1734, 1735, 1736, 1737, +1737, 1738, 1739, 1740, 1740, 1741, 1742, 1742, 1743, 1744, +1745, 1745, 1746, 1747, 1747, 1748, 1749, 1749, 1750, 1751, +1752, 1752, 1753, 1754, 1754, 1755, 1756, 1757, 1757, 1758, +1759, 1759, 1760, 1761, 1762, 1762, 1763, 1764, 1764, 1765, +1766, 1766, 1767, 1768, 1769, 1769, 1770, 1771, 1771, 1772, +1773, 1773, 1774, 1775, 1776, 1776, 1777, 1778, 1778, 1779, +1780, 1780, 1781, 1782, 1782, 1783, 1784, 1784, 1785, 1786, +1787, 1787, 1788, 1789, 1789, 1790, 1791, 1791, 1792, 1793, +1793, 1794, 1795, 1795, 1796, 1797, 1798, 1798, 1799, 1800, +1800, 1801, 1802, 1802, 1803, 1804, 1804, 1805, 1806, 1806, +1807, 1808, 1808, 1809, 1810, 1810, 1811, 1812, 1812, 1813, +1814, 1814, 1815, 1816, 1816, 1817, 1818, 1818, 1819, 1820, +1820, 1821, 1822, 1822, 1823, 1824, 1824, 1825, 1826, 1826, +1827, 1828, 1828, 1829, 1830, 1830, 1831, 1832, 1832, 1833, +1834, 1834, 1835, 1836, 1836, 1837, 1838, 1838, 1839, 1840, +1840, 1841, 1842, 1842, 1843, 1844, 1844, 1845, 1845, 1846, +1847, 1847, 1848, 1849, 1849, 1850, 1851, 1851, 1852, 1853, +1853, 1854, 1855, 1855, 1856, 1857, 1857, 1858, 1858, 1859, +1860, 1860, 1861, 1862, 1862, 1863, 1864, 1864, 1865, 1866, +1866, 1867, 1867, 1868, 1869, 1869, 1870, 1871, 1871, 1872, +1873, 1873, 1874, 1874, 1875, 1876, 1876, 1877, 1878, 1878, +1879, 1879, 1880, 1881, 1881, 1882, 1883, 1883, 1884, 1885, +1885, 1886, 1886, 1887, 1888, 1888, 1889, 1890, 1890, 1891, +1891, 1892, 1893, 1893, 1894, 1895, 1895, 1896, 1896, 1897, +1898, 1898, 1899, 1900, 1900, 1901, 1901, 1902, 1903, 1903, +1904, 1904, 1905, 1906, 1906, 1907, 1908, 1908, 1909, 1909, +1910, 1911, 1911, 1912, 1912, 1913, 1914, 1914, 1915, 1916, +1916, 1917, 1917, 1918, 1919, 1919, 1920, 1920, 1921, 1922, +1922, 1923, 1923, 1924, 1925, 1925, 1926, 1926, 1927, 1928, +1928, 1929, 1929, 1930, 1931, 1931, 1932, 1932, 1933, 1934, +1934, 1935, 1935, 1936, 1937, 1937, 1938, 1938, 1939, 1940, +1940, 1941, 1941, 1942, 1943, 1943, 1944, 1944, 1945, 1946, +1946, 1947, 1947, 1948, 1949, 1949, 1950, 1950, 1951, 1952, +1952, 1953, 1953, 1954, 1955, 1955, 1956, 1956, 1957, 1957, +1958, 1959, 1959, 1960, 1960, 1961, 1962, 1962, 1963, 1963, +1964, 1964, 1965, 1966, 1966, 1967, 1967, 1968, 1969, 1969, +1970, 1970, 1971, 1971, 1972, 1973, 1973, 1974, 1974, 1975, +1976, 1976, 1977, 1977, 1978, 1978, 1979, 1980, 1980, 1981, +1981, 1982, 1982, 1983, 1984, 1984, 1985, 1985, 1986, 1986, +1987, 1988, 1988, 1989, 1989, 1990, 1990, 1991, 1992, 1992, +1993, 1993, 1994, 1994, 1995, 1996, 1996, 1997, 1997, 1998, +1998, 1999, 1999, 2000, 2001, 2001, 2002, 2002, 2003, 2003, +2004, 2005, 2005, 2006, 2006, 2007, 2007, 2008, 2008, 2009, +2010, 2010, 2011, 2011, 2012, 2012, 2013, 2014, 2014, 2015, +2015, 2016, 2016, 2017, 2017, 2018, 2019, 2019, 2020, 2020, +2021, 2021, 2022, 2022, 2023, 2023, 2024, 2025, 2025, 2026, +2026, 2027, 2027, 2028, 2028, 2029, 2030, 2030, 2031, 2031, +2032, 2032, 2033, 2033, 2034, 2034, 2035, 2036, 2036, 2037, +2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2042, 2042, +2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2048, +2048, 2049, 2049, 2050, 2050, 2051, 2051, 2052, 2052, 2053, +2053, 2054, 2054, 2055, 2056, 2056, 2057, 2057, 2058, 2058, +2059, 2059, 2060, 2060, 2061, 2061, 2062, 2062, 2063, 2064, +2064, 2065, 2065, 2066, 2066, 2067, 2067, 2068, 2068, 2069, +2069, 2070, 2070, 2071, 2072, 2072, 2073, 2073, 2074, 2074, +2075, 2075, 2076, 2076, 2077, 2077, 2078, 2078, 2079, 2079, +2080, 2080, 2081 +}; + +static const int fe_logsub_table_size = + sizeof(fe_logsub_table) / sizeof(fe_logsub_table[0]); + +fixed32 +fe_log_sub(fixed32 x, fixed32 y) +{ + fixed32 d, r; + + if (x < MIN_FIXLOG || y >= x) + return MIN_FIXLOG; + + d = (x - y) >> (DEFAULT_RADIX - 8); + + if (d > fe_logsub_table_size - 1) + return x; + + r = fe_logsub_table[d] << (DEFAULT_RADIX - 10); +/* + printf("diff=%d\n", + x + FIXLN(x-y) - r - + (x + FLOAT2FIX(logf(-expm1f(FIX2FLOAT(y - x)))))); +*/ + return x + FIXLN(x-y) - r; +} + +static fixed32 +fe_log(float32 x) +{ + if (x <= 0) { + return MIN_FIXLOG; + } + else { + return FLOAT2FIX(log(x)); + } +} +#endif + +static float32 +fe_mel(melfb_t * mel, float32 x) +{ + float32 warped = fe_warp_unwarped_to_warped(mel, x); + + return (float32) (2595.0 * log10(1.0 + warped / 700.0)); +} + +static float32 +fe_melinv(melfb_t * mel, float32 x) +{ + float32 warped = (float32) (700.0 * (pow(10.0, x / 2595.0) - 1.0)); + return fe_warp_warped_to_unwarped(mel, warped); +} + +int32 +fe_build_melfilters(melfb_t * mel_fb) +{ + float32 melmin, melmax, melbw, fftfreq; + int n_coeffs, i, j; + + + /* Filter coefficient matrix, in flattened form. */ + mel_fb->spec_start = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->spec_start)); + mel_fb->filt_start = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->filt_start)); + mel_fb->filt_width = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->filt_width)); + + /* First calculate the widths of each filter. */ + /* Minimum and maximum frequencies in mel scale. */ + melmin = fe_mel(mel_fb, mel_fb->lower_filt_freq); + melmax = fe_mel(mel_fb, mel_fb->upper_filt_freq); + + /* Width of filters in mel scale */ + melbw = (melmax - melmin) / (mel_fb->num_filters + 1); + if (mel_fb->doublewide) { + melmin -= melbw; + melmax += melbw; + if ((fe_melinv(mel_fb, melmin) < 0) || + (fe_melinv(mel_fb, melmax) > mel_fb->sampling_rate / 2)) { + E_WARN + ("Out of Range: low filter edge = %f (%f)\n", + fe_melinv(mel_fb, melmin), 0.0); + E_WARN + (" high filter edge = %f (%f)\n", + fe_melinv(mel_fb, melmax), mel_fb->sampling_rate / 2); + return FE_INVALID_PARAM_ERROR; + } + } + + /* DFT point spacing */ + fftfreq = mel_fb->sampling_rate / (float32) mel_fb->fft_size; + + /* Count and place filter coefficients. */ + n_coeffs = 0; + for (i = 0; i < mel_fb->num_filters; ++i) { + float32 freqs[3]; + + /* Left, center, right frequencies in Hertz */ + for (j = 0; j < 3; ++j) { + if (mel_fb->doublewide) + freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); + else + freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); + /* Round them to DFT points if requested */ + if (mel_fb->round_filters) + freqs[j] = ((int) (freqs[j] / fftfreq + 0.5)) * fftfreq; + } + + /* spec_start is the start of this filter in the power spectrum. */ + mel_fb->spec_start[i] = -1; + /* There must be a better way... */ + for (j = 0; j < mel_fb->fft_size / 2 + 1; ++j) { + float32 hz = j * fftfreq; + if (hz < freqs[0]) + continue; + else if (hz > freqs[2] || j == mel_fb->fft_size / 2) { + /* filt_width is the width in DFT points of this filter. */ + mel_fb->filt_width[i] = j - mel_fb->spec_start[i]; + /* filt_start is the start of this filter in the filt_coeffs array. */ + mel_fb->filt_start[i] = n_coeffs; + n_coeffs += mel_fb->filt_width[i]; + break; + } + if (mel_fb->spec_start[i] == -1) + mel_fb->spec_start[i] = j; + } + } + + /* Now go back and allocate the coefficient array. */ + mel_fb->filt_coeffs = + ckd_malloc(n_coeffs * sizeof(*mel_fb->filt_coeffs)); + + /* And now generate the coefficients. */ + n_coeffs = 0; + for (i = 0; i < mel_fb->num_filters; ++i) { + float32 freqs[3]; + + /* Left, center, right frequencies in Hertz */ + for (j = 0; j < 3; ++j) { + if (mel_fb->doublewide) + freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); + else + freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); + /* Round them to DFT points if requested */ + if (mel_fb->round_filters) + freqs[j] = ((int) (freqs[j] / fftfreq + 0.5)) * fftfreq; + } + + for (j = 0; j < mel_fb->filt_width[i]; ++j) { + float32 hz, loslope, hislope; + + hz = (mel_fb->spec_start[i] + j) * fftfreq; + if (hz < freqs[0] || hz > freqs[2]) { + E_FATAL + ("Failed to create filterbank, frequency range does not match. " + "Sample rate %f, FFT size %d, lowerf %f < freq %f > upperf %f.\n", + mel_fb->sampling_rate, mel_fb->fft_size, freqs[0], hz, + freqs[2]); + } + loslope = (hz - freqs[0]) / (freqs[1] - freqs[0]); + hislope = (freqs[2] - hz) / (freqs[2] - freqs[1]); + if (mel_fb->unit_area) { + loslope *= 2 / (freqs[2] - freqs[0]); + hislope *= 2 / (freqs[2] - freqs[0]); + } + if (loslope < hislope) { +#ifdef FIXED_POINT + mel_fb->filt_coeffs[n_coeffs] = fe_log(loslope); +#else + mel_fb->filt_coeffs[n_coeffs] = loslope; +#endif + } + else { +#ifdef FIXED_POINT + mel_fb->filt_coeffs[n_coeffs] = fe_log(hislope); +#else + mel_fb->filt_coeffs[n_coeffs] = hislope; +#endif + } + ++n_coeffs; + } + } + + return FE_SUCCESS; +} + +int32 +fe_compute_melcosine(melfb_t * mel_fb) +{ + + float64 freqstep; + int32 i, j; + + mel_fb->mel_cosine = + (mfcc_t **) ckd_calloc_2d(mel_fb->num_cepstra, + mel_fb->num_filters, sizeof(mfcc_t)); + + freqstep = M_PI / mel_fb->num_filters; + /* NOTE: The first row vector is actually unnecessary but we leave + * it in to avoid confusion. */ + for (i = 0; i < mel_fb->num_cepstra; i++) { + for (j = 0; j < mel_fb->num_filters; j++) { + float64 cosine; + + cosine = cos(freqstep * i * (j + 0.5)); + mel_fb->mel_cosine[i][j] = FLOAT2COS(cosine); + } + } + + /* Also precompute normalization constants for unitary DCT. */ + mel_fb->sqrt_inv_n = FLOAT2COS(sqrt(1.0 / mel_fb->num_filters)); + mel_fb->sqrt_inv_2n = FLOAT2COS(sqrt(2.0 / mel_fb->num_filters)); + + /* And liftering weights */ + if (mel_fb->lifter_val) { + mel_fb->lifter = + calloc(mel_fb->num_cepstra, sizeof(*mel_fb->lifter)); + for (i = 0; i < mel_fb->num_cepstra; ++i) { + mel_fb->lifter[i] = FLOAT2MFCC(1 + mel_fb->lifter_val / 2 + * sin(i * M_PI / + mel_fb->lifter_val)); + } + } + + return (0); +} + +static void +fe_pre_emphasis(int16 const *in, frame_t * out, int32 len, + float32 factor, int16 prior) +{ + int i; + +#if defined(FIXED16) + int16 fxd_alpha = (int16) (factor * 0x8000); + int32 tmp1, tmp2; + + tmp1 = (int32) in[0] << 15; + tmp2 = (int32) prior *fxd_alpha; + out[0] = (int16) ((tmp1 - tmp2) >> 15); + for (i = 1; i < len; ++i) { + tmp1 = (int32) in[i] << 15; + tmp2 = (int32) in[i - 1] * fxd_alpha; + out[i] = (int16) ((tmp1 - tmp2) >> 15); + } +#elif defined(FIXED_POINT) + fixed32 fxd_alpha = FLOAT2FIX(factor); + out[0] = ((fixed32) in[0] << DEFAULT_RADIX) - (prior * fxd_alpha); + for (i = 1; i < len; ++i) + out[i] = ((fixed32) in[i] << DEFAULT_RADIX) + - (fixed32) in[i - 1] * fxd_alpha; +#else + out[0] = (frame_t) in[0] - (frame_t) prior *factor; + for (i = 1; i < len; i++) + out[i] = (frame_t) in[i] - (frame_t) in[i - 1] * factor; +#endif +} + +static void +fe_short_to_frame(int16 const *in, frame_t * out, int32 len) +{ + int i; + +#if defined(FIXED16) + memcpy(out, in, len * sizeof(*out)); +#elif defined(FIXED_POINT) + for (i = 0; i < len; i++) + out[i] = (int32) in[i] << DEFAULT_RADIX; +#else /* FIXED_POINT */ + for (i = 0; i < len; i++) + out[i] = (frame_t) in[i]; +#endif /* FIXED_POINT */ +} + +void +fe_create_hamming(window_t * in, int32 in_len) +{ + int i; + + /* Symmetric, so we only create the first half of it. */ + for (i = 0; i < in_len / 2; i++) { + float64 hamm; + hamm = (0.54 - 0.46 * cos(2 * M_PI * i / + ((float64) in_len - 1.0))); +#ifdef FIXED16 + in[i] = (int16) (hamm * 0x8000); +#else + in[i] = FLOAT2COS(hamm); +#endif + } +} + +static void +fe_hamming_window(frame_t * in, window_t * window, int32 in_len, + int32 remove_dc) +{ + int i; + + if (remove_dc) { +#ifdef FIXED16 + int32 mean = 0; /* Use int32 to avoid possibility of overflow */ +#else + frame_t mean = 0; +#endif + + for (i = 0; i < in_len; i++) + mean += in[i]; + mean /= in_len; + for (i = 0; i < in_len; i++) + in[i] -= (frame_t) mean; + } + +#ifdef FIXED16 + for (i = 0; i < in_len / 2; i++) { + int32 tmp1, tmp2; + + tmp1 = (int32) in[i] * window[i]; + tmp2 = (int32) in[in_len - 1 - i] * window[i]; + in[i] = (int16) (tmp1 >> 15); + in[in_len - 1 - i] = (int16) (tmp2 >> 15); + } +#else + for (i = 0; i < in_len / 2; i++) { + in[i] = COSMUL(in[i], window[i]); + in[in_len - 1 - i] = COSMUL(in[in_len - 1 - i], window[i]); + } +#endif +} + +static int +fe_spch_to_frame(fe_t * fe, int len) +{ + /* Copy to the frame buffer. */ + if (fe->pre_emphasis_alpha != 0.0) { + fe_pre_emphasis(fe->spch, fe->frame, len, + fe->pre_emphasis_alpha, fe->prior); + if (len >= fe->frame_shift) + fe->prior = fe->spch[fe->frame_shift - 1]; + else + fe->prior = fe->spch[len - 1]; + } + else + fe_short_to_frame(fe->spch, fe->frame, len); + + /* Zero pad up to FFT size. */ + memset(fe->frame + len, 0, (fe->fft_size - len) * sizeof(*fe->frame)); + + /* Window. */ + fe_hamming_window(fe->frame, fe->hamming_window, fe->frame_size, + fe->remove_dc); + + return len; +} + +int +fe_read_frame(fe_t * fe, int16 const *in, int32 len) +{ + int i; + + if (len > fe->frame_size) + len = fe->frame_size; + + /* Read it into the raw speech buffer. */ + memcpy(fe->spch, in, len * sizeof(*in)); + /* Swap and dither if necessary. */ + if (fe->swap) + for (i = 0; i < len; ++i) + SWAP_INT16(&fe->spch[i]); + if (fe->dither) + for (i = 0; i < len; ++i) + fe->spch[i] += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); + + return fe_spch_to_frame(fe, len); +} + +int +fe_shift_frame(fe_t * fe, int16 const *in, int32 len) +{ + int offset, i; + + if (len > fe->frame_shift) + len = fe->frame_shift; + offset = fe->frame_size - fe->frame_shift; + + /* Shift data into the raw speech buffer. */ + memmove(fe->spch, fe->spch + fe->frame_shift, + offset * sizeof(*fe->spch)); + memcpy(fe->spch + offset, in, len * sizeof(*fe->spch)); + /* Swap and dither if necessary. */ + if (fe->swap) + for (i = 0; i < len; ++i) + SWAP_INT16(&fe->spch[offset + i]); + if (fe->dither) + for (i = 0; i < len; ++i) + fe->spch[offset + i] + += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); + + return fe_spch_to_frame(fe, offset + len); +} + +/** + * Create arrays of twiddle factors. + */ +void +fe_create_twiddle(fe_t * fe) +{ + int i; + + for (i = 0; i < fe->fft_size / 4; ++i) { + float64 a = 2 * M_PI * i / fe->fft_size; +#ifdef FIXED16 + fe->ccc[i] = (int16) (cos(a) * 0x8000); + fe->sss[i] = (int16) (sin(a) * 0x8000); +#elif defined(FIXED_POINT) + fe->ccc[i] = FLOAT2COS(cos(a)); + fe->sss[i] = FLOAT2COS(sin(a)); +#else + fe->ccc[i] = cos(a); + fe->sss[i] = sin(a); +#endif + } +} + + +/* Translated from the FORTRAN (obviously) from "Real-Valued Fast + * Fourier Transform Algorithms" by Henrik V. Sorensen et al., IEEE + * Transactions on Acoustics, Speech, and Signal Processing, vol. 35, + * no.6. The 16-bit version does a version of "block floating + * point" in order to avoid rounding errors. + */ +#if defined(FIXED16) +static int +fe_fft_real(fe_t * fe) +{ + int i, j, k, m, n, lz; + frame_t *x, xt, max; + + x = fe->frame; + m = fe->fft_order; + n = fe->fft_size; + + /* Bit-reverse the input. */ + j = 0; + for (i = 0; i < n - 1; ++i) { + if (i < j) { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = n / 2; + while (k <= j) { + j -= k; + k /= 2; + } + j += k; + } + /* Determine how many bits of dynamic range are in the input. */ + max = 0; + for (i = 0; i < n; ++i) + if (abs(x[i]) > max) + max = abs(x[i]); + /* The FFT has a gain of M bits, so we need to attenuate the input + * by M bits minus the number of leading zeroes in the input's + * range in order to avoid overflows. */ + for (lz = 0; lz < m; ++lz) + if (max & (1 << (15 - lz))) + break; + + /* Basic butterflies (2-point FFT, real twiddle factors): + * x[i] = x[i] + 1 * x[i+1] + * x[i+1] = x[i] + -1 * x[i+1] + */ + /* The quantization error introduced by attenuating the input at + * any given stage of the FFT has a cascading effect, so we hold + * off on it until it's absolutely necessary. */ + for (i = 0; i < n; i += 2) { + int atten = (lz == 0); + xt = x[i] >> atten; + x[i] = xt + (x[i + 1] >> atten); + x[i + 1] = xt - (x[i + 1] >> atten); + } + + /* The rest of the butterflies, in stages from 1..m */ + for (k = 1; k < m; ++k) { + int n1, n2, n4; + /* Start attenuating once we hit the number of leading zeros. */ + int atten = (k >= lz); + + n4 = k - 1; + n2 = k; + n1 = k + 1; + /* Stride over each (1 << (k+1)) points */ + for (i = 0; i < n; i += (1 << n1)) { + /* Basic butterfly with real twiddle factors: + * x[i] = x[i] + 1 * x[i + (1<<k)] + * x[i + (1<<k)] = x[i] + -1 * x[i + (1<<k)] + */ + xt = x[i] >> atten; + x[i] = xt + (x[i + (1 << n2)] >> atten); + x[i + (1 << n2)] = xt - (x[i + (1 << n2)] >> atten); + + /* The other ones with real twiddle factors: + * x[i + (1<<k) + (1<<(k-1))] + * = 0 * x[i + (1<<k-1)] + -1 * x[i + (1<<k) + (1<<k-1)] + * x[i + (1<<(k-1))] + * = 1 * x[i + (1<<k-1)] + 0 * x[i + (1<<k) + (1<<k-1)] + */ + x[i + (1 << n2) + (1 << n4)] = + -x[i + (1 << n2) + (1 << n4)] >> atten; + x[i + (1 << n4)] = x[i + (1 << n4)] >> atten; + + /* Butterflies with complex twiddle factors. + * There are (1<<k-1) of them. + */ + for (j = 1; j < (1 << n4); ++j) { + frame_t cc, ss, t1, t2; + int i1, i2, i3, i4; + + i1 = i + j; + i2 = i + (1 << n2) - j; + i3 = i + (1 << n2) + j; + i4 = i + (1 << n2) + (1 << n2) - j; + + /* + * cc = real(W[j * n / (1<<(k+1))]) + * ss = imag(W[j * n / (1<<(k+1))]) + */ + cc = fe->ccc[j << (m - n1)]; + ss = fe->sss[j << (m - n1)]; + + /* There are some symmetry properties which allow us + * to get away with only four multiplications here. */ + { + int32 tmp1, tmp2; + tmp1 = (int32) x[i3] * cc + (int32) x[i4] * ss; + tmp2 = (int32) x[i3] * ss - (int32) x[i4] * cc; + t1 = (int16) (tmp1 >> 15) >> atten; + t2 = (int16) (tmp2 >> 15) >> atten; + } + + x[i4] = (x[i2] >> atten) - t2; + x[i3] = (-x[i2] >> atten) - t2; + x[i2] = (x[i1] >> atten) - t1; + x[i1] = (x[i1] >> atten) + t1; + } + } + } + + /* Return the residual scaling factor. */ + return lz; +} +#else /* !FIXED16 */ +static int +fe_fft_real(fe_t * fe) +{ + int i, j, k, m, n; + frame_t *x, xt; + + x = fe->frame; + m = fe->fft_order; + n = fe->fft_size; + + /* Bit-reverse the input. */ + j = 0; + for (i = 0; i < n - 1; ++i) { + if (i < j) { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = n / 2; + while (k <= j) { + j -= k; + k /= 2; + } + j += k; + } + + /* Basic butterflies (2-point FFT, real twiddle factors): + * x[i] = x[i] + 1 * x[i+1] + * x[i+1] = x[i] + -1 * x[i+1] + */ + for (i = 0; i < n; i += 2) { + xt = x[i]; + x[i] = (xt + x[i + 1]); + x[i + 1] = (xt - x[i + 1]); + } + + /* The rest of the butterflies, in stages from 1..m */ + for (k = 1; k < m; ++k) { + int n1, n2, n4; + + n4 = k - 1; + n2 = k; + n1 = k + 1; + /* Stride over each (1 << (k+1)) points */ + for (i = 0; i < n; i += (1 << n1)) { + /* Basic butterfly with real twiddle factors: + * x[i] = x[i] + 1 * x[i + (1<<k)] + * x[i + (1<<k)] = x[i] + -1 * x[i + (1<<k)] + */ + xt = x[i]; + x[i] = (xt + x[i + (1 << n2)]); + x[i + (1 << n2)] = (xt - x[i + (1 << n2)]); + + /* The other ones with real twiddle factors: + * x[i + (1<<k) + (1<<(k-1))] + * = 0 * x[i + (1<<k-1)] + -1 * x[i + (1<<k) + (1<<k-1)] + * x[i + (1<<(k-1))] + * = 1 * x[i + (1<<k-1)] + 0 * x[i + (1<<k) + (1<<k-1)] + */ + x[i + (1 << n2) + (1 << n4)] = -x[i + (1 << n2) + (1 << n4)]; + x[i + (1 << n4)] = x[i + (1 << n4)]; + + /* Butterflies with complex twiddle factors. + * There are (1<<k-1) of them. + */ + for (j = 1; j < (1 << n4); ++j) { + frame_t cc, ss, t1, t2; + int i1, i2, i3, i4; + + i1 = i + j; + i2 = i + (1 << n2) - j; + i3 = i + (1 << n2) + j; + i4 = i + (1 << n2) + (1 << n2) - j; + + /* + * cc = real(W[j * n / (1<<(k+1))]) + * ss = imag(W[j * n / (1<<(k+1))]) + */ + cc = fe->ccc[j << (m - n1)]; + ss = fe->sss[j << (m - n1)]; + + /* There are some symmetry properties which allow us + * to get away with only four multiplications here. */ + t1 = COSMUL(x[i3], cc) + COSMUL(x[i4], ss); + t2 = COSMUL(x[i3], ss) - COSMUL(x[i4], cc); + + x[i4] = (x[i2] - t2); + x[i3] = (-x[i2] - t2); + x[i2] = (x[i1] - t1); + x[i1] = (x[i1] + t1); + } + } + } + + /* This isn't used, but return it for completeness. */ + return m; +} +#endif /* !FIXED16 */ + +static void +fe_spec_magnitude(fe_t * fe) +{ + frame_t *fft; + powspec_t *spec; + int32 j, scale, fftsize; + + /* Do FFT and get the scaling factor back (only actually used in + * fixed-point). Note the scaling factor is expressed in bits. */ + scale = fe_fft_real(fe); + + /* Convenience pointers to make things less awkward below. */ + fft = fe->frame; + spec = fe->spec; + fftsize = fe->fft_size; + + /* We need to scale things up the rest of the way to N. */ + scale = fe->fft_order - scale; + + /* The first point (DC coefficient) has no imaginary part */ + { +#ifdef FIXED16 + spec[0] = fixlog(abs(fft[0]) << scale) * 2; +#elif defined(FIXED_POINT) + spec[0] = FIXLN(abs(fft[0]) << scale) * 2; +#else + spec[0] = fft[0] * fft[0]; +#endif + } + + for (j = 1; j <= fftsize / 2; j++) { +#ifdef FIXED16 + int32 rr = fixlog(abs(fft[j]) << scale) * 2; + int32 ii = fixlog(abs(fft[fftsize - j]) << scale) * 2; + spec[j] = fe_log_add(rr, ii); +#elif defined(FIXED_POINT) + int32 rr = FIXLN(abs(fft[j]) << scale) * 2; + int32 ii = FIXLN(abs(fft[fftsize - j]) << scale) * 2; + spec[j] = fe_log_add(rr, ii); +#else + spec[j] = fft[j] * fft[j] + fft[fftsize - j] * fft[fftsize - j]; +#endif + } +} + +static void +fe_mel_spec(fe_t * fe) +{ + int whichfilt; + powspec_t *spec, *mfspec; + + /* Convenience poitners. */ + spec = fe->spec; + mfspec = fe->mfspec; + for (whichfilt = 0; whichfilt < fe->mel_fb->num_filters; whichfilt++) { + int spec_start, filt_start, i; + + spec_start = fe->mel_fb->spec_start[whichfilt]; + filt_start = fe->mel_fb->filt_start[whichfilt]; + +#ifdef FIXED_POINT + mfspec[whichfilt] = + spec[spec_start] + fe->mel_fb->filt_coeffs[filt_start]; + for (i = 1; i < fe->mel_fb->filt_width[whichfilt]; i++) { + mfspec[whichfilt] = fe_log_add(mfspec[whichfilt], + spec[spec_start + i] + + fe->mel_fb-> + filt_coeffs[filt_start + i]); + } +#else /* !FIXED_POINT */ + mfspec[whichfilt] = 0; + for (i = 0; i < fe->mel_fb->filt_width[whichfilt]; i++) + mfspec[whichfilt] += + spec[spec_start + i] * fe->mel_fb->filt_coeffs[filt_start + + i]; +#endif /* !FIXED_POINT */ + } + +} + +#define LOG_FLOOR 1e-4 + +static void +fe_mel_cep(fe_t * fe, mfcc_t * mfcep) +{ + int32 i; + powspec_t *mfspec; + + /* Convenience pointer. */ + mfspec = fe->mfspec; + + for (i = 0; i < fe->mel_fb->num_filters; ++i) { +#ifndef FIXED_POINT /* It's already in log domain for fixed point */ + mfspec[i] = log(mfspec[i] + LOG_FLOOR); +#endif /* !FIXED_POINT */ + } + + /* If we are doing LOG_SPEC, then do nothing. */ + if (fe->log_spec == RAW_LOG_SPEC) { + for (i = 0; i < fe->feature_dimension; i++) { + mfcep[i] = (mfcc_t) mfspec[i]; + } + } + /* For smoothed spectrum, do DCT-II followed by (its inverse) DCT-III */ + else if (fe->log_spec == SMOOTH_LOG_SPEC) { + /* FIXME: This is probably broken for fixed-point. */ + fe_dct2(fe, mfspec, mfcep, 0); + fe_dct3(fe, mfcep, mfspec); + for (i = 0; i < fe->feature_dimension; i++) { + mfcep[i] = (mfcc_t) mfspec[i]; + } + } + else if (fe->transform == DCT_II) + fe_dct2(fe, mfspec, mfcep, FALSE); + else if (fe->transform == DCT_HTK) + fe_dct2(fe, mfspec, mfcep, TRUE); + else + fe_spec2cep(fe, mfspec, mfcep); + + return; +} + +void +fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep) +{ + int32 i, j, beta; + + /* Compute C0 separately (its basis vector is 1) to avoid + * costly multiplications. */ + mfcep[0] = mflogspec[0] / 2; /* beta = 0.5 */ + for (j = 1; j < fe->mel_fb->num_filters; j++) + mfcep[0] += mflogspec[j]; /* beta = 1.0 */ + mfcep[0] /= (frame_t) fe->mel_fb->num_filters; + + for (i = 1; i < fe->num_cepstra; ++i) { + mfcep[i] = 0; + for (j = 0; j < fe->mel_fb->num_filters; j++) { + if (j == 0) + beta = 1; /* 0.5 */ + else + beta = 2; /* 1.0 */ + mfcep[i] += COSMUL(mflogspec[j], + fe->mel_fb->mel_cosine[i][j]) * beta; + } + /* Note that this actually normalizes by num_filters, like the + * original Sphinx front-end, due to the doubled 'beta' factor + * above. */ + mfcep[i] /= (frame_t) fe->mel_fb->num_filters * 2; + } +} + +void +fe_dct2(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep, int htk) +{ + int32 i, j; + + /* Compute C0 separately (its basis vector is 1) to avoid + * costly multiplications. */ + mfcep[0] = mflogspec[0]; + for (j = 1; j < fe->mel_fb->num_filters; j++) + mfcep[0] += mflogspec[j]; + if (htk) + mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_2n); + else /* sqrt(1/N) = sqrt(2/N) * 1/sqrt(2) */ + mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_n); + + for (i = 1; i < fe->num_cepstra; ++i) { + mfcep[i] = 0; + for (j = 0; j < fe->mel_fb->num_filters; j++) { + mfcep[i] += COSMUL(mflogspec[j], fe->mel_fb->mel_cosine[i][j]); + } + mfcep[i] = COSMUL(mfcep[i], fe->mel_fb->sqrt_inv_2n); + } +} + +void +fe_lifter(fe_t * fe, mfcc_t * mfcep) +{ + int32 i; + + if (fe->mel_fb->lifter_val == 0) + return; + + for (i = 0; i < fe->num_cepstra; ++i) { + mfcep[i] = MFCCMUL(mfcep[i], fe->mel_fb->lifter[i]); + } +} + +void +fe_dct3(fe_t * fe, const mfcc_t * mfcep, powspec_t * mflogspec) +{ + int32 i, j; + + for (i = 0; i < fe->mel_fb->num_filters; ++i) { + mflogspec[i] = COSMUL(mfcep[0], SQRT_HALF); + for (j = 1; j < fe->num_cepstra; j++) { + mflogspec[i] += COSMUL(mfcep[j], fe->mel_fb->mel_cosine[j][i]); + } + mflogspec[i] = COSMUL(mflogspec[i], fe->mel_fb->sqrt_inv_2n); + } +} + +void +fe_write_frame(fe_t * fe, mfcc_t * fea) +{ + int32 is_speech; + + fe_spec_magnitude(fe); + fe_mel_spec(fe); + fe_track_snr(fe, &is_speech); + fe_mel_cep(fe, fea); + fe_lifter(fe, fea); + fe_vad_hangover(fe, fea, is_speech); +} + + +void * +fe_create_2d(int32 d1, int32 d2, int32 elem_size) +{ + return (void *) ckd_calloc_2d(d1, d2, elem_size); +} + +void +fe_free_2d(void *arr) +{ + ckd_free_2d((void **) arr); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_type.h b/media/sphinxbase/src/libsphinxbase/fe/fe_type.h new file mode 100644 index 000000000..160ed8ff8 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_type.h @@ -0,0 +1,65 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef FE_TYPE_H +#define FE_TYPE_H + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#ifdef FIXED16 +/* Q15 format */ +typedef int16 frame_t; +typedef int16 window_t; +typedef int32 powspec_t; +typedef struct { int16 r, i; } complex; +#elif defined(FIXED_POINT) +typedef fixed32 frame_t; +typedef int32 powspec_t; +typedef fixed32 window_t; +typedef struct { fixed32 r, i; } complex; +#else /* FIXED_POINT */ +typedef float64 frame_t; +typedef float64 powspec_t; +typedef float64 window_t; +typedef struct { float64 r, i; } complex; +#endif /* FIXED_POINT */ + +#endif /* FE_TYPE_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.c new file mode 100644 index 000000000..e409bea76 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.c @@ -0,0 +1,252 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp.c + * + * Description: + * Allows a caller to choose a warping function. + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $";*/ + +#include "fe_warp_inverse_linear.h" +#include "fe_warp_affine.h" +#include "fe_warp_piecewise_linear.h" +#include "fe_warp.h" + +#include "sphinxbase/err.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +/* This is for aliases for each of the entries below. Currently not + used. +*/ +static char *__name2id[] = { + "inverse", + "linear", + "piecewise", + NULL +}; + +static char *name2id[] = { + "inverse_linear", + "affine", + "piecewise_linear", + NULL +}; + +static fe_warp_conf_t fe_warp_conf[FE_WARP_ID_MAX + 1] = { + {fe_warp_inverse_linear_set_parameters, + fe_warp_inverse_linear_doc, + fe_warp_inverse_linear_id, + fe_warp_inverse_linear_n_param, + fe_warp_inverse_linear_warped_to_unwarped, + fe_warp_inverse_linear_unwarped_to_warped, + fe_warp_inverse_linear_print}, /* Inverse linear warping */ + {fe_warp_affine_set_parameters, + fe_warp_affine_doc, + fe_warp_affine_id, + fe_warp_affine_n_param, + fe_warp_affine_warped_to_unwarped, + fe_warp_affine_unwarped_to_warped, + fe_warp_affine_print}, /* Affine warping */ + {fe_warp_piecewise_linear_set_parameters, + fe_warp_piecewise_linear_doc, + fe_warp_piecewise_linear_id, + fe_warp_piecewise_linear_n_param, + fe_warp_piecewise_linear_warped_to_unwarped, + fe_warp_piecewise_linear_unwarped_to_warped, + fe_warp_piecewise_linear_print}, /* Piecewise_Linear warping */ +}; + +int +fe_warp_set(melfb_t *mel, const char *id_name) +{ + uint32 i; + + for (i = 0; name2id[i]; i++) { + if (strcmp(id_name, name2id[i]) == 0) { + mel->warp_id = i; + break; + } + } + + if (name2id[i] == NULL) { + for (i = 0; __name2id[i]; i++) { + if (strcmp(id_name, __name2id[i]) == 0) { + mel->warp_id = i; + break; + } + } + if (__name2id[i] == NULL) { + E_ERROR("Unimplemented warping function %s\n", id_name); + E_ERROR("Implemented functions are:\n"); + for (i = 0; name2id[i]; i++) { + fprintf(stderr, "\t%s\n", name2id[i]); + } + mel->warp_id = FE_WARP_ID_NONE; + + return FE_START_ERROR; + } + } + + return FE_SUCCESS; +} + +void +fe_warp_set_parameters(melfb_t *mel, char const *param_str, float sampling_rate) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + fe_warp_conf[mel->warp_id].set_parameters(param_str, sampling_rate); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("feat module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } +} + +const char * +fe_warp_doc(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].doc(); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return NULL; +} + +uint32 +fe_warp_id(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + assert(mel->warp_id == fe_warp_conf[mel->warp_id].id()); + return mel->warp_id; + } + else if (mel->warp_id != FE_WARP_ID_NONE) { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return FE_WARP_ID_NONE; +} + +uint32 +fe_warp_n_param(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].n_param(); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +float +fe_warp_warped_to_unwarped(melfb_t *mel, float nonlinear) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].warped_to_unwarped(nonlinear); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +float +fe_warp_unwarped_to_warped(melfb_t *mel,float linear) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].unwarped_to_warped(linear); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +void +fe_warp_print(melfb_t *mel, const char *label) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + fe_warp_conf[mel->warp_id].print(label); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.h new file mode 100644 index 000000000..f2fd14550 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.h @@ -0,0 +1,90 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_H +#define FE_WARP_H + +#include "fe_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define FE_WARP_ID_INVERSE_LINEAR 0 +#define FE_WARP_ID_AFFINE 1 +#define FE_WARP_ID_PIECEWISE_LINEAR 2 +#define FE_WARP_ID_EIDE_GISH 3 +#define FE_WARP_ID_MAX 2 +#define FE_WARP_ID_NONE 0xffffffff + +typedef struct { + void (*set_parameters)(char const *param_str, float sampling_rate); + const char * (*doc)(void); + uint32 (*id)(void); + uint32 (*n_param)(void); + float (*warped_to_unwarped)(float nonlinear); + float (*unwarped_to_warped)(float linear); + void (*print)(const char *label); +} fe_warp_conf_t; + +int fe_warp_set(melfb_t *mel, const char *id_name); + +uint32 fe_warp_id(melfb_t *mel); + +const char * fe_warp_doc(melfb_t *mel); + +void fe_warp_set_parameters(melfb_t *mel, char const *param_str, float sampling_rate); + +uint32 fe_warp_n_param(melfb_t *mel); + +float fe_warp_warped_to_unwarped(melfb_t *mel, float nonlinear); + +float fe_warp_unwarped_to_warped(melfb_t *mel, float linear); + +void fe_warp_print(melfb_t *mel, const char *label); + +#define FE_WARP_NO_SIZE 0xffffffff + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c new file mode 100644 index 000000000..398611917 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c @@ -0,0 +1,181 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_affine.c + * + * Description: + * Warp the frequency axis according to an affine function, i.e.: + * + * w' = a * w + b + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_affine.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $"; */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_affine.h" + +#define N_PARAM 2 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + * params[1] : b + */ +static float params[N_PARAM] = { 1.0f, 0.0f }; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_affine_doc() +{ + return "affine :== < w' = a * x + b >"; +} + +uint32 +fe_warp_affine_id() +{ + return FE_WARP_ID_AFFINE; +} + +uint32 +fe_warp_affine_n_param() +{ + return N_PARAM; +} + +void +fe_warp_affine_set_parameters(char const *param_str, float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Affine warping takes up to two arguments, %s ignored.\n", + tok); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Affine warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_affine_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = (nonlinear - b) / a */ + float temp = nonlinear - params[1]; + temp /= params[0]; + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_affine_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + /* nonlinear = a * linear - b */ + float temp = linear * params[0]; + temp += params[1]; + return temp; + } +} + +void +fe_warp_affine_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h new file mode 100644 index 000000000..44027d97a --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h @@ -0,0 +1,76 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_AFFINE_H +#define FE_WARP_AFFINE_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_affine_doc(void); + +uint32 +fe_warp_affine_id(void); + +uint32 +fe_warp_affine_n_param(void); + +void +fe_warp_affine_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_affine_warped_to_unwarped(float nonlinear); + +float +fe_warp_affine_unwarped_to_warped(float linear); + +void +fe_warp_affine_print(const char *label); + +#ifdef __cplusplus +} +#endif + +#endif /* FE_WARP_AFFINE_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c new file mode 100644 index 000000000..85e42986b --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c @@ -0,0 +1,178 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_inverse_linear.c + * + * Description: + * Warp the frequency axis according to an inverse_linear function, i.e.: + * + * w' = w / a + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_inverse_linear.c,v 1.3 2006/02/23 19:40:11 eht Exp $"; */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_inverse_linear.h" + +#define N_PARAM 1 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + */ +static float params[N_PARAM] = { 1.0f }; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_inverse_linear_doc() +{ + return "inverse_linear :== < w' = x / a >"; +} + +uint32 +fe_warp_inverse_linear_id() +{ + return FE_WARP_ID_INVERSE_LINEAR; +} + +uint32 +fe_warp_inverse_linear_n_param() +{ + return N_PARAM; +} + +void +fe_warp_inverse_linear_set_parameters(char const *param_str, float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Inverse linear warping takes only one argument, %s ignored.\n", + tok); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Inverse linear warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_inverse_linear_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = nonlinear * a */ + float temp = nonlinear * params[0]; + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_inverse_linear_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + /* nonlinear = a / linear */ + float temp = linear / params[0]; + return temp; + } +} + +void +fe_warp_inverse_linear_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h new file mode 100644 index 000000000..8d4a76725 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_inverse_linear_H +#define FE_WARP_inverse_linear_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_inverse_linear_doc(void); + +uint32 +fe_warp_inverse_linear_id(void); + +uint32 +fe_warp_inverse_linear_n_param(void); + +void +fe_warp_inverse_linear_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_inverse_linear_warped_to_unwarped(float nonlinear); + +float +fe_warp_inverse_linear_unwarped_to_warped(float linear); + +void +fe_warp_inverse_linear_print(const char *label); + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_inverse_linear_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c new file mode 100644 index 000000000..34570120f --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c @@ -0,0 +1,223 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_piecewise_linear.c + * + * Description: + * + * Warp the frequency axis according to an piecewise linear + * function. The function is linear up to a frequency F, where + * the slope changes so that the Nyquist frequency in the warped + * axis maps to the Nyquist frequency in the unwarped. + * + * w' = a * w, w < F + * w' = a' * w + b, W > F + * w'(0) = 0 + * w'(F) = F + * w'(Nyq) = Nyq + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_piecewise_linear.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $"; */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_piecewise_linear.h" + +#define N_PARAM 2 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + * params[1] : F (the non-differentiable point) + */ +static float params[N_PARAM] = { 1.0f, 6800.0f }; +static float final_piece[2]; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_piecewise_linear_doc() +{ + return "piecewise_linear :== < w' = a * w, w < F >"; +} + +uint32 +fe_warp_piecewise_linear_id() +{ + return FE_WARP_ID_PIECEWISE_LINEAR; +} + +uint32 +fe_warp_piecewise_linear_n_param() +{ + return N_PARAM; +} + +void +fe_warp_piecewise_linear_set_parameters(char const *param_str, + float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + memset(final_piece, 0, 2 * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Piecewise linear warping takes up to two arguments, %s ignored.\n", + tok); + } + if (params[1] < sampling_rate) { + /* Precompute these. These are the coefficients of a + * straight line that contains the points (F, aF) and (N, + * N), where a = params[0], F = params[1], N = Nyquist + * frequency. + */ + if (params[1] == 0) { + params[1] = sampling_rate * 0.85f; + } + final_piece[0] = + (nyquist_frequency - + params[0] * params[1]) / (nyquist_frequency - params[1]); + final_piece[1] = + nyquist_frequency * params[1] * (params[0] - + 1.0f) / (nyquist_frequency - + params[1]); + } + else { + memset(final_piece, 0, 2 * sizeof(float)); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Piecewise linear warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_piecewise_linear_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = (nonlinear - b) / a */ + float temp; + if (nonlinear < params[0] * params[1]) { + temp = nonlinear / params[0]; + } + else { + temp = nonlinear - final_piece[1]; + temp /= final_piece[0]; + } + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_piecewise_linear_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + float temp; + /* nonlinear = a * linear - b */ + if (linear < params[1]) { + temp = linear * params[0]; + } + else { + temp = final_piece[0] * linear + final_piece[1]; + } + return temp; + } +} + +void +fe_warp_piecewise_linear_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h new file mode 100644 index 000000000..f15cb251e --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_PIECEWIDE_LINEAR_H +#define FE_WARP_PIECEWIDE_LINEAR_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_piecewise_linear_doc(void); + +uint32 +fe_warp_piecewise_linear_id(void); + +uint32 +fe_warp_piecewise_linear_n_param(void); + +void +fe_warp_piecewise_linear_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_piecewise_linear_warped_to_unwarped(float nonlinear); + +float +fe_warp_piecewise_linear_unwarped_to_warped(float linear); + +void +fe_warp_piecewise_linear_print(const char *label); + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_PIECEWIDE_LINEAR_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fixlog.c b/media/sphinxbase/src/libsphinxbase/fe/fixlog.c new file mode 100644 index 000000000..459c9ffd6 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fixlog.c @@ -0,0 +1,229 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * File: fixlog.c + * + * Description: Fast approximate fixed-point logarithms + * + * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> + * + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_internal.h" + +/* Table of log2(x/128)*(1<<DEFAULT_RADIX) */ +/* perl -e 'for (0..128) {my $x = 1 + $_/128; $y = 1 + ($_ + 1.) / 128; + print " (uint32)(", (log($x) + log($y))/2/log(2)," *(1<<DEFAULT_RADIX)),\n"}' */ +static uint32 logtable[] = { + (uint32)(0.00561362771162706*(1<<DEFAULT_RADIX)), + (uint32)(0.0167975342258543*(1<<DEFAULT_RADIX)), + (uint32)(0.0278954072829524*(1<<DEFAULT_RADIX)), + (uint32)(0.0389085604479519*(1<<DEFAULT_RADIX)), + (uint32)(0.0498382774298215*(1<<DEFAULT_RADIX)), + (uint32)(0.060685812979481*(1<<DEFAULT_RADIX)), + (uint32)(0.0714523937543017*(1<<DEFAULT_RADIX)), + (uint32)(0.0821392191505851*(1<<DEFAULT_RADIX)), + (uint32)(0.0927474621054331*(1<<DEFAULT_RADIX)), + (uint32)(0.103278269869348*(1<<DEFAULT_RADIX)), + (uint32)(0.113732764750838*(1<<DEFAULT_RADIX)), + (uint32)(0.124112044834237*(1<<DEFAULT_RADIX)), + (uint32)(0.13441718467188*(1<<DEFAULT_RADIX)), + (uint32)(0.144649235951738*(1<<DEFAULT_RADIX)), + (uint32)(0.154809228141536*(1<<DEFAULT_RADIX)), + (uint32)(0.164898169110351*(1<<DEFAULT_RADIX)), + (uint32)(0.174917045728623*(1<<DEFAULT_RADIX)), + (uint32)(0.184866824447476*(1<<DEFAULT_RADIX)), + (uint32)(0.194748451858191*(1<<DEFAULT_RADIX)), + (uint32)(0.204562855232657*(1<<DEFAULT_RADIX)), + (uint32)(0.214310943045556*(1<<DEFAULT_RADIX)), + (uint32)(0.223993605479021*(1<<DEFAULT_RADIX)), + (uint32)(0.23361171491048*(1<<DEFAULT_RADIX)), + (uint32)(0.243166126384332*(1<<DEFAULT_RADIX)), + (uint32)(0.252657678068119*(1<<DEFAULT_RADIX)), + (uint32)(0.262087191693777*(1<<DEFAULT_RADIX)), + (uint32)(0.271455472984569*(1<<DEFAULT_RADIX)), + (uint32)(0.280763312068243*(1<<DEFAULT_RADIX)), + (uint32)(0.290011483876938*(1<<DEFAULT_RADIX)), + (uint32)(0.299200748534365*(1<<DEFAULT_RADIX)), + (uint32)(0.308331851730729*(1<<DEFAULT_RADIX)), + (uint32)(0.317405525085859*(1<<DEFAULT_RADIX)), + (uint32)(0.32642248650099*(1<<DEFAULT_RADIX)), + (uint32)(0.335383440499621*(1<<DEFAULT_RADIX)), + (uint32)(0.344289078557851*(1<<DEFAULT_RADIX)), + (uint32)(0.353140079424581*(1<<DEFAULT_RADIX)), + (uint32)(0.36193710943195*(1<<DEFAULT_RADIX)), + (uint32)(0.37068082279637*(1<<DEFAULT_RADIX)), + (uint32)(0.379371861910488*(1<<DEFAULT_RADIX)), + (uint32)(0.388010857626406*(1<<DEFAULT_RADIX)), + (uint32)(0.396598429530472*(1<<DEFAULT_RADIX)), + (uint32)(0.405135186209943*(1<<DEFAULT_RADIX)), + (uint32)(0.4136217255118*(1<<DEFAULT_RADIX)), + (uint32)(0.422058634793998*(1<<DEFAULT_RADIX)), + (uint32)(0.430446491169411*(1<<DEFAULT_RADIX)), + (uint32)(0.438785861742727*(1<<DEFAULT_RADIX)), + (uint32)(0.447077303840529*(1<<DEFAULT_RADIX)), + (uint32)(0.455321365234813*(1<<DEFAULT_RADIX)), + (uint32)(0.463518584360147*(1<<DEFAULT_RADIX)), + (uint32)(0.471669490524698*(1<<DEFAULT_RADIX)), + (uint32)(0.479774604115327*(1<<DEFAULT_RADIX)), + (uint32)(0.487834436796966*(1<<DEFAULT_RADIX)), + (uint32)(0.49584949170644*(1<<DEFAULT_RADIX)), + (uint32)(0.503820263640951*(1<<DEFAULT_RADIX)), + (uint32)(0.511747239241369*(1<<DEFAULT_RADIX)), + (uint32)(0.519630897170528*(1<<DEFAULT_RADIX)), + (uint32)(0.527471708286662*(1<<DEFAULT_RADIX)), + (uint32)(0.535270135812172*(1<<DEFAULT_RADIX)), + (uint32)(0.543026635497834*(1<<DEFAULT_RADIX)), + (uint32)(0.550741655782637*(1<<DEFAULT_RADIX)), + (uint32)(0.558415637949355*(1<<DEFAULT_RADIX)), + (uint32)(0.56604901627601*(1<<DEFAULT_RADIX)), + (uint32)(0.573642218183348*(1<<DEFAULT_RADIX)), + (uint32)(0.581195664378452*(1<<DEFAULT_RADIX)), + (uint32)(0.588709768994618*(1<<DEFAULT_RADIX)), + (uint32)(0.596184939727604*(1<<DEFAULT_RADIX)), + (uint32)(0.603621577968369*(1<<DEFAULT_RADIX)), + (uint32)(0.61102007893241*(1<<DEFAULT_RADIX)), + (uint32)(0.618380831785792*(1<<DEFAULT_RADIX)), + (uint32)(0.625704219767993*(1<<DEFAULT_RADIX)), + (uint32)(0.632990620311629*(1<<DEFAULT_RADIX)), + (uint32)(0.640240405159187*(1<<DEFAULT_RADIX)), + (uint32)(0.647453940476827*(1<<DEFAULT_RADIX)), + (uint32)(0.654631586965362*(1<<DEFAULT_RADIX)), + (uint32)(0.661773699968486*(1<<DEFAULT_RADIX)), + (uint32)(0.668880629578336*(1<<DEFAULT_RADIX)), + (uint32)(0.675952720738471*(1<<DEFAULT_RADIX)), + (uint32)(0.682990313344332*(1<<DEFAULT_RADIX)), + (uint32)(0.689993742341272*(1<<DEFAULT_RADIX)), + (uint32)(0.696963337820209*(1<<DEFAULT_RADIX)), + (uint32)(0.703899425110987*(1<<DEFAULT_RADIX)), + (uint32)(0.710802324873503*(1<<DEFAULT_RADIX)), + (uint32)(0.717672353186654*(1<<DEFAULT_RADIX)), + (uint32)(0.724509821635192*(1<<DEFAULT_RADIX)), + (uint32)(0.731315037394519*(1<<DEFAULT_RADIX)), + (uint32)(0.738088303313493*(1<<DEFAULT_RADIX)), + (uint32)(0.744829917995304*(1<<DEFAULT_RADIX)), + (uint32)(0.751540175876464*(1<<DEFAULT_RADIX)), + (uint32)(0.758219367303974*(1<<DEFAULT_RADIX)), + (uint32)(0.764867778610703*(1<<DEFAULT_RADIX)), + (uint32)(0.77148569218905*(1<<DEFAULT_RADIX)), + (uint32)(0.778073386562917*(1<<DEFAULT_RADIX)), + (uint32)(0.784631136458046*(1<<DEFAULT_RADIX)), + (uint32)(0.791159212870769*(1<<DEFAULT_RADIX)), + (uint32)(0.797657883135205*(1<<DEFAULT_RADIX)), + (uint32)(0.804127410988954*(1<<DEFAULT_RADIX)), + (uint32)(0.810568056637321*(1<<DEFAULT_RADIX)), + (uint32)(0.816980076816112*(1<<DEFAULT_RADIX)), + (uint32)(0.823363724853051*(1<<DEFAULT_RADIX)), + (uint32)(0.829719250727828*(1<<DEFAULT_RADIX)), + (uint32)(0.836046901130843*(1<<DEFAULT_RADIX)), + (uint32)(0.84234691952066*(1<<DEFAULT_RADIX)), + (uint32)(0.848619546180216*(1<<DEFAULT_RADIX)), + (uint32)(0.854865018271815*(1<<DEFAULT_RADIX)), + (uint32)(0.861083569890926*(1<<DEFAULT_RADIX)), + (uint32)(0.867275432118842*(1<<DEFAULT_RADIX)), + (uint32)(0.873440833074202*(1<<DEFAULT_RADIX)), + (uint32)(0.879579997963421*(1<<DEFAULT_RADIX)), + (uint32)(0.88569314913005*(1<<DEFAULT_RADIX)), + (uint32)(0.891780506103101*(1<<DEFAULT_RADIX)), + (uint32)(0.897842285644346*(1<<DEFAULT_RADIX)), + (uint32)(0.903878701794633*(1<<DEFAULT_RADIX)), + (uint32)(0.90988996591924*(1<<DEFAULT_RADIX)), + (uint32)(0.915876286752278*(1<<DEFAULT_RADIX)), + (uint32)(0.921837870440188*(1<<DEFAULT_RADIX)), + (uint32)(0.927774920584334*(1<<DEFAULT_RADIX)), + (uint32)(0.933687638282728*(1<<DEFAULT_RADIX)), + (uint32)(0.939576222170905*(1<<DEFAULT_RADIX)), + (uint32)(0.945440868461959*(1<<DEFAULT_RADIX)), + (uint32)(0.951281770985776*(1<<DEFAULT_RADIX)), + (uint32)(0.957099121227478*(1<<DEFAULT_RADIX)), + (uint32)(0.962893108365084*(1<<DEFAULT_RADIX)), + (uint32)(0.968663919306429*(1<<DEFAULT_RADIX)), + (uint32)(0.974411738725344*(1<<DEFAULT_RADIX)), + (uint32)(0.980136749097113*(1<<DEFAULT_RADIX)), + (uint32)(0.985839130733238*(1<<DEFAULT_RADIX)), + (uint32)(0.991519061815512*(1<<DEFAULT_RADIX)), + (uint32)(0.997176718429429*(1<<DEFAULT_RADIX)), + (uint32)(1.00281227459694*(1<<DEFAULT_RADIX)), +}; + +int32 +fixlog2(uint32 x) +{ + uint32 y; + + if (x == 0) + return MIN_FIXLOG2; + + /* Get the exponent. */ +#if ((defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_7A__)) && !defined(__thumb__)) + __asm__("clz %0, %1\n": "=r"(y):"r"(x)); + x <<= y; + y = 31 - y; +#elif defined(__ppc__) + __asm__("cntlzw %0, %1\n": "=r"(y):"r"(x)); + x <<= y; + y = 31 - y; +#elif __GNUC__ >= 4 + y = __builtin_clz(x); + x <<= y; + y = (31 - y); +#else + for (y = 31; y > 0; --y) { + if (x & 0x80000000) + break; + x <<= 1; + } +#endif + y <<= DEFAULT_RADIX; + /* Do a table lookup for the MSB of the mantissa. */ + x = (x >> 24) & 0x7f; + return y + logtable[x]; +} + +int +fixlog(uint32 x) +{ + int32 y; + y = fixlog2(x); + return FIXMUL(y, FIXLN_2); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/yin.c b/media/sphinxbase/src/libsphinxbase/fe/yin.c new file mode 100644 index 000000000..a63fb30a9 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/yin.c @@ -0,0 +1,412 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Copyright (c) 2008 Beyond Access, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR + * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file yin.c Implementation of pitch extraction. + * @author David Huggins-Daines <dhuggins@cs.cmu.edu> + */ + +/* This implements part of the YIN algorithm: + * + * "YIN, a fundamental frequency estimator for speech and music". + * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical + * Society of America, 111 (4), April 2002. + */ + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/fixpoint.h" + +#include "sphinxbase/yin.h" + +#include <stdio.h> +#include <string.h> + +struct yin_s { + uint16 frame_size; /** Size of analysis frame. */ +#ifndef FIXED_POINT + float search_threshold; /**< Threshold for finding period */ + float search_range; /**< Range around best local estimate to search */ +#else + uint16 search_threshold; /**< Threshold for finding period, in Q15 */ + uint16 search_range; /**< Range around best local estimate to search, in Q15 */ +#endif + uint16 nfr; /**< Number of frames read so far. */ + + unsigned char wsize; /**< Size of smoothing window. */ + unsigned char wstart; /**< First frame in window. */ + unsigned char wcur; /**< Current frame of analysis. */ + unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */ + +#ifndef FIXED_POINT + float **diff_window; /**< Window of difference function outputs. */ +#else + fixed32 **diff_window; /**< Window of difference function outputs. */ +#endif + uint16 *period_window; /**< Window of best period estimates. */ + int16 *frame; /**< Storage for frame */ +}; + +/** + * The core of YIN: cumulative mean normalized difference function. + */ +#ifndef FIXED_POINT +static void +cmn_diff(int16 const *signal, float *out_diff, int ndiff) +{ + double cum; + int t, j; + + cum = 0.0f; + out_diff[0] = 1.0f; + + for (t = 1; t < ndiff; ++t) { + float dd; + dd = 0.0f; + for (j = 0; j < ndiff; ++j) { + int diff = signal[j] - signal[t + j]; + dd += (diff * diff); + } + cum += dd; + out_diff[t] = (float)(dd * t / cum); + } +} +#else +static void +cmn_diff(int16 const *signal, int32 *out_diff, int ndiff) +{ + uint32 cum, cshift; + int32 t, tscale; + + out_diff[0] = 32768; + cum = 0; + cshift = 0; + + /* Determine how many bits we can scale t up by below. */ + for (tscale = 0; tscale < 32; ++tscale) + if (ndiff & (1<<(31-tscale))) + break; + --tscale; /* Avoid teh overflowz. */ + /* printf("tscale is %d (ndiff - 1) << tscale is %d\n", + tscale, (ndiff-1) << tscale); */ + + /* Somewhat elaborate block floating point implementation. + * The fp implementation of this is really a lot simpler. */ + for (t = 1; t < ndiff; ++t) { + uint32 dd, dshift, norm; + int j; + + dd = 0; + dshift = 0; + for (j = 0; j < ndiff; ++j) { + int diff = signal[j] - signal[t + j]; + /* Guard against overflows. */ + if (dd > (1UL<<tscale)) { + dd >>= 1; + ++dshift; + } + dd += (diff * diff) >> dshift; + } + /* Make sure the diffs and cum are shifted to the same + * scaling factor (usually dshift will be zero) */ + if (dshift > cshift) { + cum += dd << (dshift-cshift); + } + else { + cum += dd >> (cshift-dshift); + } + + /* Guard against overflows and also ensure that (t<<tscale) > cum. */ + while (cum > (1UL<<tscale)) { + cum >>= 1; + ++cshift; + } + /* Avoid divide-by-zero! */ + if (cum == 0) cum = 1; + /* Calculate the normalizer in high precision. */ + norm = (t << tscale) / cum; + /* Do a long multiply and shift down to Q15. */ + out_diff[t] = (int32)(((long long)dd * norm) + >> (tscale - 15 + cshift - dshift)); + /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n", + dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */ + } +} +#endif + +yin_t * +yin_init(int frame_size, float search_threshold, + float search_range, int smooth_window) +{ + yin_t *pe; + + pe = ckd_calloc(1, sizeof(*pe)); + pe->frame_size = frame_size; +#ifndef FIXED_POINT + pe->search_threshold = search_threshold; + pe->search_range = search_range; +#else + pe->search_threshold = (uint16)(search_threshold * 32768); + pe->search_range = (uint16)(search_range * 32768); +#endif + pe->wsize = smooth_window * 2 + 1; + pe->diff_window = ckd_calloc_2d(pe->wsize, + pe->frame_size / 2, + sizeof(**pe->diff_window)); + pe->period_window = ckd_calloc(pe->wsize, + sizeof(*pe->period_window)); + pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame)); + return pe; +} + +void +yin_free(yin_t *pe) +{ + ckd_free_2d(pe->diff_window); + ckd_free(pe->period_window); + ckd_free(pe); +} + +void +yin_start(yin_t *pe) +{ + /* Reset the circular window pointers. */ + pe->wstart = pe->endut = 0; + pe->nfr = 0; +} + +void +yin_end(yin_t *pe) +{ + pe->endut = 1; +} + +int +#ifndef FIXED_POINT +thresholded_search(float *diff_window, float threshold, int start, int end) +#else +thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end) +#endif +{ + int i, argmin; +#ifndef FIXED_POINT + float min; +#else + int min; +#endif + + min = diff_window[start]; + argmin = start; + for (i = start + 1; i < end; ++i) { +#ifndef FIXED_POINT + float diff = diff_window[i]; +#else + int diff = diff_window[i]; +#endif + + if (diff < threshold) { + min = diff; + argmin = i; + break; + } + if (diff < min) { + min = diff; + argmin = i; + } + } + return argmin; +} + +void +yin_store(yin_t *pe, int16 const *frame) +{ + memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame)); +} + +void +yin_write(yin_t *pe, int16 const *frame) +{ + int outptr, difflen; + + /* Rotate the window one frame forward. */ + ++pe->wstart; + /* Fill in the frame before wstart. */ + outptr = pe->wstart - 1; + /* Wrap around the window pointer. */ + if (pe->wstart == pe->wsize) + pe->wstart = 0; + + /* Now calculate normalized difference function. */ + difflen = pe->frame_size / 2; + cmn_diff(frame, pe->diff_window[outptr], difflen); + + /* Find the first point under threshold. If not found, then + * use the absolute minimum. */ + pe->period_window[outptr] + = thresholded_search(pe->diff_window[outptr], + pe->search_threshold, 0, difflen); + + /* Increment total number of frames. */ + ++pe->nfr; +} + +void +yin_write_stored(yin_t *pe) +{ + yin_write(pe, pe->frame); +} + +int +yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff) +{ + int wstart, wlen, half_wsize, i; + int best, search_width, low_period, high_period; +#ifndef FIXED_POINT + float best_diff; +#else + int best_diff; +#endif + + half_wsize = (pe->wsize-1)/2; + /* Without any smoothing, just return the current value (don't + * need to do anything to the current poitner either). */ + if (half_wsize == 0) { + if (pe->endut) + return 0; + *out_period = pe->period_window[0]; +#ifndef FIXED_POINT + *out_bestdiff = pe->diff_window[0][pe->period_window[0]]; +#else + *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f; +#endif + return 1; + } + + /* We can't do anything unless we have at least (wsize-1)/2 + 1 + * frames, unless we're at the end of the utterance. */ + if (pe->endut == 0 && pe->nfr < half_wsize + 1) { + /* Don't increment the current pointer either. */ + return 0; + } + + /* Establish the smoothing window. */ + /* End of utterance. */ + if (pe->endut) { + /* We are done (no more data) when pe->wcur = pe->wstart. */ + if (pe->wcur == pe->wstart) + return 0; + /* I.e. pe->wcur (circular minus) half_wsize. */ + wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize; + /* Number of frames from wstart up to pe->wstart. */ + wlen = pe->wstart - wstart; + if (wlen < 0) wlen += pe->wsize; + /*printf("ENDUT! ");*/ + } + /* Beginning of utterance. */ + else if (pe->nfr < pe->wsize) { + wstart = 0; + wlen = pe->nfr; + } + /* Normal case, it is what it is. */ + else { + wstart = pe->wstart; + wlen = pe->wsize; + } + + /* Now (finally) look for the best local estimate. */ + /* printf("Searching for local estimate in %d frames around %d\n", + wlen, pe->nfr + 1 - wlen); */ + best = pe->period_window[pe->wcur]; + best_diff = pe->diff_window[pe->wcur][best]; + for (i = 0; i < wlen; ++i) { + int j = wstart + i; +#ifndef FIXED_POINT + float diff; +#else + int diff; +#endif + + j %= pe->wsize; + diff = pe->diff_window[j][pe->period_window[j]]; + /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768, + pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */ + if (diff < best_diff) { + best_diff = diff; + best = pe->period_window[j]; + } + } + /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768, + best ? 8000.0/best : 8000.0); */ + /* If it's the same as the current one then return it. */ + if (best == pe->period_window[pe->wcur]) { + /* Increment the current pointer. */ + if (++pe->wcur == pe->wsize) + pe->wcur = 0; + *out_period = best; +#ifndef FIXED_POINT + *out_bestdiff = best_diff; +#else + *out_bestdiff = best_diff / 32768.0f; +#endif + return 1; + } + /* Otherwise, redo the search inside a narrower range. */ +#ifndef FIXED_POINT + search_width = (int)(best * pe->search_range); +#else + search_width = best * pe->search_range / 32768; +#endif + /* printf("Search width = %d * %.2f = %d\n", + best, (double)pe->search_range/32768, search_width); */ + if (search_width == 0) search_width = 1; + low_period = best - search_width; + high_period = best + search_width; + if (low_period < 0) low_period = 0; + if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2; + /* printf("Searching from %d to %d\n", low_period, high_period); */ + best = thresholded_search(pe->diff_window[pe->wcur], + pe->search_threshold, + low_period, high_period); + best_diff = pe->diff_window[pe->wcur][best]; + + if (out_period) + *out_period = (best > 65535) ? 65535 : best; + if (out_bestdiff) { +#ifndef FIXED_POINT + *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff; +#else + *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f; +#endif + } + + /* Increment the current pointer. */ + if (++pe->wcur == pe->wsize) + pe->wcur = 0; + return 1; +} |