/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * Copyright (c) 2008 Beyond Access, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * @file yin.c Implementation of pitch extraction. * @author David Huggins-Daines */ /* This implements part of the YIN algorithm: * * "YIN, a fundamental frequency estimator for speech and music". * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical * Society of America, 111 (4), April 2002. */ #include "sphinxbase/prim_type.h" #include "sphinxbase/ckd_alloc.h" #include "sphinxbase/fixpoint.h" #include "sphinxbase/yin.h" #include #include struct yin_s { uint16 frame_size; /** Size of analysis frame. */ #ifndef FIXED_POINT float search_threshold; /**< Threshold for finding period */ float search_range; /**< Range around best local estimate to search */ #else uint16 search_threshold; /**< Threshold for finding period, in Q15 */ uint16 search_range; /**< Range around best local estimate to search, in Q15 */ #endif uint16 nfr; /**< Number of frames read so far. */ unsigned char wsize; /**< Size of smoothing window. */ unsigned char wstart; /**< First frame in window. */ unsigned char wcur; /**< Current frame of analysis. */ unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */ #ifndef FIXED_POINT float **diff_window; /**< Window of difference function outputs. */ #else fixed32 **diff_window; /**< Window of difference function outputs. */ #endif uint16 *period_window; /**< Window of best period estimates. */ int16 *frame; /**< Storage for frame */ }; /** * The core of YIN: cumulative mean normalized difference function. */ #ifndef FIXED_POINT static void cmn_diff(int16 const *signal, float *out_diff, int ndiff) { double cum; int t, j; cum = 0.0f; out_diff[0] = 1.0f; for (t = 1; t < ndiff; ++t) { float dd; dd = 0.0f; for (j = 0; j < ndiff; ++j) { int diff = signal[j] - signal[t + j]; dd += (diff * diff); } cum += dd; out_diff[t] = (float)(dd * t / cum); } } #else static void cmn_diff(int16 const *signal, int32 *out_diff, int ndiff) { uint32 cum, cshift; int32 t, tscale; out_diff[0] = 32768; cum = 0; cshift = 0; /* Determine how many bits we can scale t up by below. */ for (tscale = 0; tscale < 32; ++tscale) if (ndiff & (1<<(31-tscale))) break; --tscale; /* Avoid teh overflowz. */ /* printf("tscale is %d (ndiff - 1) << tscale is %d\n", tscale, (ndiff-1) << tscale); */ /* Somewhat elaborate block floating point implementation. * The fp implementation of this is really a lot simpler. */ for (t = 1; t < ndiff; ++t) { uint32 dd, dshift, norm; int j; dd = 0; dshift = 0; for (j = 0; j < ndiff; ++j) { int diff = signal[j] - signal[t + j]; /* Guard against overflows. */ if (dd > (1UL<>= 1; ++dshift; } dd += (diff * diff) >> dshift; } /* Make sure the diffs and cum are shifted to the same * scaling factor (usually dshift will be zero) */ if (dshift > cshift) { cum += dd << (dshift-cshift); } else { cum += dd >> (cshift-dshift); } /* Guard against overflows and also ensure that (t< cum. */ while (cum > (1UL<>= 1; ++cshift; } /* Avoid divide-by-zero! */ if (cum == 0) cum = 1; /* Calculate the normalizer in high precision. */ norm = (t << tscale) / cum; /* Do a long multiply and shift down to Q15. */ out_diff[t] = (int32)(((long long)dd * norm) >> (tscale - 15 + cshift - dshift)); /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n", dd, cshift, dshift, (t<frame_size = frame_size; #ifndef FIXED_POINT pe->search_threshold = search_threshold; pe->search_range = search_range; #else pe->search_threshold = (uint16)(search_threshold * 32768); pe->search_range = (uint16)(search_range * 32768); #endif pe->wsize = smooth_window * 2 + 1; pe->diff_window = ckd_calloc_2d(pe->wsize, pe->frame_size / 2, sizeof(**pe->diff_window)); pe->period_window = ckd_calloc(pe->wsize, sizeof(*pe->period_window)); pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame)); return pe; } void yin_free(yin_t *pe) { ckd_free_2d(pe->diff_window); ckd_free(pe->period_window); ckd_free(pe); } void yin_start(yin_t *pe) { /* Reset the circular window pointers. */ pe->wstart = pe->endut = 0; pe->nfr = 0; } void yin_end(yin_t *pe) { pe->endut = 1; } int #ifndef FIXED_POINT thresholded_search(float *diff_window, float threshold, int start, int end) #else thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end) #endif { int i, argmin; #ifndef FIXED_POINT float min; #else int min; #endif min = diff_window[start]; argmin = start; for (i = start + 1; i < end; ++i) { #ifndef FIXED_POINT float diff = diff_window[i]; #else int diff = diff_window[i]; #endif if (diff < threshold) { min = diff; argmin = i; break; } if (diff < min) { min = diff; argmin = i; } } return argmin; } void yin_store(yin_t *pe, int16 const *frame) { memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame)); } void yin_write(yin_t *pe, int16 const *frame) { int outptr, difflen; /* Rotate the window one frame forward. */ ++pe->wstart; /* Fill in the frame before wstart. */ outptr = pe->wstart - 1; /* Wrap around the window pointer. */ if (pe->wstart == pe->wsize) pe->wstart = 0; /* Now calculate normalized difference function. */ difflen = pe->frame_size / 2; cmn_diff(frame, pe->diff_window[outptr], difflen); /* Find the first point under threshold. If not found, then * use the absolute minimum. */ pe->period_window[outptr] = thresholded_search(pe->diff_window[outptr], pe->search_threshold, 0, difflen); /* Increment total number of frames. */ ++pe->nfr; } void yin_write_stored(yin_t *pe) { yin_write(pe, pe->frame); } int yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff) { int wstart, wlen, half_wsize, i; int best, search_width, low_period, high_period; #ifndef FIXED_POINT float best_diff; #else int best_diff; #endif half_wsize = (pe->wsize-1)/2; /* Without any smoothing, just return the current value (don't * need to do anything to the current poitner either). */ if (half_wsize == 0) { if (pe->endut) return 0; *out_period = pe->period_window[0]; #ifndef FIXED_POINT *out_bestdiff = pe->diff_window[0][pe->period_window[0]]; #else *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f; #endif return 1; } /* We can't do anything unless we have at least (wsize-1)/2 + 1 * frames, unless we're at the end of the utterance. */ if (pe->endut == 0 && pe->nfr < half_wsize + 1) { /* Don't increment the current pointer either. */ return 0; } /* Establish the smoothing window. */ /* End of utterance. */ if (pe->endut) { /* We are done (no more data) when pe->wcur = pe->wstart. */ if (pe->wcur == pe->wstart) return 0; /* I.e. pe->wcur (circular minus) half_wsize. */ wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize; /* Number of frames from wstart up to pe->wstart. */ wlen = pe->wstart - wstart; if (wlen < 0) wlen += pe->wsize; /*printf("ENDUT! ");*/ } /* Beginning of utterance. */ else if (pe->nfr < pe->wsize) { wstart = 0; wlen = pe->nfr; } /* Normal case, it is what it is. */ else { wstart = pe->wstart; wlen = pe->wsize; } /* Now (finally) look for the best local estimate. */ /* printf("Searching for local estimate in %d frames around %d\n", wlen, pe->nfr + 1 - wlen); */ best = pe->period_window[pe->wcur]; best_diff = pe->diff_window[pe->wcur][best]; for (i = 0; i < wlen; ++i) { int j = wstart + i; #ifndef FIXED_POINT float diff; #else int diff; #endif j %= pe->wsize; diff = pe->diff_window[j][pe->period_window[j]]; /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768, pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */ if (diff < best_diff) { best_diff = diff; best = pe->period_window[j]; } } /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768, best ? 8000.0/best : 8000.0); */ /* If it's the same as the current one then return it. */ if (best == pe->period_window[pe->wcur]) { /* Increment the current pointer. */ if (++pe->wcur == pe->wsize) pe->wcur = 0; *out_period = best; #ifndef FIXED_POINT *out_bestdiff = best_diff; #else *out_bestdiff = best_diff / 32768.0f; #endif return 1; } /* Otherwise, redo the search inside a narrower range. */ #ifndef FIXED_POINT search_width = (int)(best * pe->search_range); #else search_width = best * pe->search_range / 32768; #endif /* printf("Search width = %d * %.2f = %d\n", best, (double)pe->search_range/32768, search_width); */ if (search_width == 0) search_width = 1; low_period = best - search_width; high_period = best + search_width; if (low_period < 0) low_period = 0; if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2; /* printf("Searching from %d to %d\n", low_period, high_period); */ best = thresholded_search(pe->diff_window[pe->wcur], pe->search_threshold, low_period, high_period); best_diff = pe->diff_window[pe->wcur][best]; if (out_period) *out_period = (best > 65535) ? 65535 : best; if (out_bestdiff) { #ifndef FIXED_POINT *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff; #else *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f; #endif } /* Increment the current pointer. */ if (++pe->wcur == pe->wsize) pe->wcur = 0; return 1; }