summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/fe/yin.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/fe/yin.c')
-rw-r--r--media/sphinxbase/src/libsphinxbase/fe/yin.c412
1 files changed, 412 insertions, 0 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/fe/yin.c b/media/sphinxbase/src/libsphinxbase/fe/yin.c
new file mode 100644
index 000000000..a63fb30a9
--- /dev/null
+++ b/media/sphinxbase/src/libsphinxbase/fe/yin.c
@@ -0,0 +1,412 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/*
+ * Copyright (c) 2008 Beyond Access, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR
+ * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file yin.c Implementation of pitch extraction.
+ * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
+ */
+
+/* This implements part of the YIN algorithm:
+ *
+ * "YIN, a fundamental frequency estimator for speech and music".
+ * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical
+ * Society of America, 111 (4), April 2002.
+ */
+
+#include "sphinxbase/prim_type.h"
+#include "sphinxbase/ckd_alloc.h"
+#include "sphinxbase/fixpoint.h"
+
+#include "sphinxbase/yin.h"
+
+#include <stdio.h>
+#include <string.h>
+
+struct yin_s {
+ uint16 frame_size; /** Size of analysis frame. */
+#ifndef FIXED_POINT
+ float search_threshold; /**< Threshold for finding period */
+ float search_range; /**< Range around best local estimate to search */
+#else
+ uint16 search_threshold; /**< Threshold for finding period, in Q15 */
+ uint16 search_range; /**< Range around best local estimate to search, in Q15 */
+#endif
+ uint16 nfr; /**< Number of frames read so far. */
+
+ unsigned char wsize; /**< Size of smoothing window. */
+ unsigned char wstart; /**< First frame in window. */
+ unsigned char wcur; /**< Current frame of analysis. */
+ unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */
+
+#ifndef FIXED_POINT
+ float **diff_window; /**< Window of difference function outputs. */
+#else
+ fixed32 **diff_window; /**< Window of difference function outputs. */
+#endif
+ uint16 *period_window; /**< Window of best period estimates. */
+ int16 *frame; /**< Storage for frame */
+};
+
+/**
+ * The core of YIN: cumulative mean normalized difference function.
+ */
+#ifndef FIXED_POINT
+static void
+cmn_diff(int16 const *signal, float *out_diff, int ndiff)
+{
+ double cum;
+ int t, j;
+
+ cum = 0.0f;
+ out_diff[0] = 1.0f;
+
+ for (t = 1; t < ndiff; ++t) {
+ float dd;
+ dd = 0.0f;
+ for (j = 0; j < ndiff; ++j) {
+ int diff = signal[j] - signal[t + j];
+ dd += (diff * diff);
+ }
+ cum += dd;
+ out_diff[t] = (float)(dd * t / cum);
+ }
+}
+#else
+static void
+cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
+{
+ uint32 cum, cshift;
+ int32 t, tscale;
+
+ out_diff[0] = 32768;
+ cum = 0;
+ cshift = 0;
+
+ /* Determine how many bits we can scale t up by below. */
+ for (tscale = 0; tscale < 32; ++tscale)
+ if (ndiff & (1<<(31-tscale)))
+ break;
+ --tscale; /* Avoid teh overflowz. */
+ /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
+ tscale, (ndiff-1) << tscale); */
+
+ /* Somewhat elaborate block floating point implementation.
+ * The fp implementation of this is really a lot simpler. */
+ for (t = 1; t < ndiff; ++t) {
+ uint32 dd, dshift, norm;
+ int j;
+
+ dd = 0;
+ dshift = 0;
+ for (j = 0; j < ndiff; ++j) {
+ int diff = signal[j] - signal[t + j];
+ /* Guard against overflows. */
+ if (dd > (1UL<<tscale)) {
+ dd >>= 1;
+ ++dshift;
+ }
+ dd += (diff * diff) >> dshift;
+ }
+ /* Make sure the diffs and cum are shifted to the same
+ * scaling factor (usually dshift will be zero) */
+ if (dshift > cshift) {
+ cum += dd << (dshift-cshift);
+ }
+ else {
+ cum += dd >> (cshift-dshift);
+ }
+
+ /* Guard against overflows and also ensure that (t<<tscale) > cum. */
+ while (cum > (1UL<<tscale)) {
+ cum >>= 1;
+ ++cshift;
+ }
+ /* Avoid divide-by-zero! */
+ if (cum == 0) cum = 1;
+ /* Calculate the normalizer in high precision. */
+ norm = (t << tscale) / cum;
+ /* Do a long multiply and shift down to Q15. */
+ out_diff[t] = (int32)(((long long)dd * norm)
+ >> (tscale - 15 + cshift - dshift));
+ /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
+ dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
+ }
+}
+#endif
+
+yin_t *
+yin_init(int frame_size, float search_threshold,
+ float search_range, int smooth_window)
+{
+ yin_t *pe;
+
+ pe = ckd_calloc(1, sizeof(*pe));
+ pe->frame_size = frame_size;
+#ifndef FIXED_POINT
+ pe->search_threshold = search_threshold;
+ pe->search_range = search_range;
+#else
+ pe->search_threshold = (uint16)(search_threshold * 32768);
+ pe->search_range = (uint16)(search_range * 32768);
+#endif
+ pe->wsize = smooth_window * 2 + 1;
+ pe->diff_window = ckd_calloc_2d(pe->wsize,
+ pe->frame_size / 2,
+ sizeof(**pe->diff_window));
+ pe->period_window = ckd_calloc(pe->wsize,
+ sizeof(*pe->period_window));
+ pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame));
+ return pe;
+}
+
+void
+yin_free(yin_t *pe)
+{
+ ckd_free_2d(pe->diff_window);
+ ckd_free(pe->period_window);
+ ckd_free(pe);
+}
+
+void
+yin_start(yin_t *pe)
+{
+ /* Reset the circular window pointers. */
+ pe->wstart = pe->endut = 0;
+ pe->nfr = 0;
+}
+
+void
+yin_end(yin_t *pe)
+{
+ pe->endut = 1;
+}
+
+int
+#ifndef FIXED_POINT
+thresholded_search(float *diff_window, float threshold, int start, int end)
+#else
+thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
+#endif
+{
+ int i, argmin;
+#ifndef FIXED_POINT
+ float min;
+#else
+ int min;
+#endif
+
+ min = diff_window[start];
+ argmin = start;
+ for (i = start + 1; i < end; ++i) {
+#ifndef FIXED_POINT
+ float diff = diff_window[i];
+#else
+ int diff = diff_window[i];
+#endif
+
+ if (diff < threshold) {
+ min = diff;
+ argmin = i;
+ break;
+ }
+ if (diff < min) {
+ min = diff;
+ argmin = i;
+ }
+ }
+ return argmin;
+}
+
+void
+yin_store(yin_t *pe, int16 const *frame)
+{
+ memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame));
+}
+
+void
+yin_write(yin_t *pe, int16 const *frame)
+{
+ int outptr, difflen;
+
+ /* Rotate the window one frame forward. */
+ ++pe->wstart;
+ /* Fill in the frame before wstart. */
+ outptr = pe->wstart - 1;
+ /* Wrap around the window pointer. */
+ if (pe->wstart == pe->wsize)
+ pe->wstart = 0;
+
+ /* Now calculate normalized difference function. */
+ difflen = pe->frame_size / 2;
+ cmn_diff(frame, pe->diff_window[outptr], difflen);
+
+ /* Find the first point under threshold. If not found, then
+ * use the absolute minimum. */
+ pe->period_window[outptr]
+ = thresholded_search(pe->diff_window[outptr],
+ pe->search_threshold, 0, difflen);
+
+ /* Increment total number of frames. */
+ ++pe->nfr;
+}
+
+void
+yin_write_stored(yin_t *pe)
+{
+ yin_write(pe, pe->frame);
+}
+
+int
+yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff)
+{
+ int wstart, wlen, half_wsize, i;
+ int best, search_width, low_period, high_period;
+#ifndef FIXED_POINT
+ float best_diff;
+#else
+ int best_diff;
+#endif
+
+ half_wsize = (pe->wsize-1)/2;
+ /* Without any smoothing, just return the current value (don't
+ * need to do anything to the current poitner either). */
+ if (half_wsize == 0) {
+ if (pe->endut)
+ return 0;
+ *out_period = pe->period_window[0];
+#ifndef FIXED_POINT
+ *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
+#else
+ *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f;
+#endif
+ return 1;
+ }
+
+ /* We can't do anything unless we have at least (wsize-1)/2 + 1
+ * frames, unless we're at the end of the utterance. */
+ if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
+ /* Don't increment the current pointer either. */
+ return 0;
+ }
+
+ /* Establish the smoothing window. */
+ /* End of utterance. */
+ if (pe->endut) {
+ /* We are done (no more data) when pe->wcur = pe->wstart. */
+ if (pe->wcur == pe->wstart)
+ return 0;
+ /* I.e. pe->wcur (circular minus) half_wsize. */
+ wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
+ /* Number of frames from wstart up to pe->wstart. */
+ wlen = pe->wstart - wstart;
+ if (wlen < 0) wlen += pe->wsize;
+ /*printf("ENDUT! ");*/
+ }
+ /* Beginning of utterance. */
+ else if (pe->nfr < pe->wsize) {
+ wstart = 0;
+ wlen = pe->nfr;
+ }
+ /* Normal case, it is what it is. */
+ else {
+ wstart = pe->wstart;
+ wlen = pe->wsize;
+ }
+
+ /* Now (finally) look for the best local estimate. */
+ /* printf("Searching for local estimate in %d frames around %d\n",
+ wlen, pe->nfr + 1 - wlen); */
+ best = pe->period_window[pe->wcur];
+ best_diff = pe->diff_window[pe->wcur][best];
+ for (i = 0; i < wlen; ++i) {
+ int j = wstart + i;
+#ifndef FIXED_POINT
+ float diff;
+#else
+ int diff;
+#endif
+
+ j %= pe->wsize;
+ diff = pe->diff_window[j][pe->period_window[j]];
+ /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
+ pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
+ if (diff < best_diff) {
+ best_diff = diff;
+ best = pe->period_window[j];
+ }
+ }
+ /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
+ best ? 8000.0/best : 8000.0); */
+ /* If it's the same as the current one then return it. */
+ if (best == pe->period_window[pe->wcur]) {
+ /* Increment the current pointer. */
+ if (++pe->wcur == pe->wsize)
+ pe->wcur = 0;
+ *out_period = best;
+#ifndef FIXED_POINT
+ *out_bestdiff = best_diff;
+#else
+ *out_bestdiff = best_diff / 32768.0f;
+#endif
+ return 1;
+ }
+ /* Otherwise, redo the search inside a narrower range. */
+#ifndef FIXED_POINT
+ search_width = (int)(best * pe->search_range);
+#else
+ search_width = best * pe->search_range / 32768;
+#endif
+ /* printf("Search width = %d * %.2f = %d\n",
+ best, (double)pe->search_range/32768, search_width); */
+ if (search_width == 0) search_width = 1;
+ low_period = best - search_width;
+ high_period = best + search_width;
+ if (low_period < 0) low_period = 0;
+ if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
+ /* printf("Searching from %d to %d\n", low_period, high_period); */
+ best = thresholded_search(pe->diff_window[pe->wcur],
+ pe->search_threshold,
+ low_period, high_period);
+ best_diff = pe->diff_window[pe->wcur][best];
+
+ if (out_period)
+ *out_period = (best > 65535) ? 65535 : best;
+ if (out_bestdiff) {
+#ifndef FIXED_POINT
+ *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff;
+#else
+ *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f;
+#endif
+ }
+
+ /* Increment the current pointer. */
+ if (++pe->wcur == pe->wsize)
+ pe->wcur = 0;
+ return 1;
+}