Merge remote-tracking branch 'origin/redwood' into release

author: Moonchild <moonchild@palemoon.org> 2020-06-01 21:58:35 +0000
committer: Moonchild <moonchild@palemoon.org> 2020-06-01 21:58:35 +0000
commit: c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810 (patch)
tree: c7672903a2030d37f861b12900165a015f49d10a /media/pocketsphinx/src/acmod.h
parent: 451509e2c0188a4164d4b3d1d9f5839ed1e95246 (diff)
parent: 744b044935f7d1d67fbe0df42d898efcbdd00536 (diff)
download: UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar
UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar.gz
UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar.lz
UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar.xz
UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.zip
1 files changed, 0 insertions, 466 deletions
diff --git a/media/pocketsphinx/src/acmod.h b/media/pocketsphinx/src/acmod.h
deleted file mode 100644
index f4d5761c2..000000000
--- a/media/pocketsphinx/src/acmod.h
+++ /dev/null
@@ -1,466 +0,0 @@
-/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
-/* ====================================================================
- * Copyright (c) 2008 Carnegie Mellon University.  All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * This work was supported in part by funding from the Defense Advanced 
- * Research Projects Agency and the National Science Foundation of the 
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
-
-/**
- * @file acmod.h Acoustic model structures for PocketSphinx.
- * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
- */
-
-#ifndef __ACMOD_H__
-#define __ACMOD_H__
-
-/* System headers. */
-#include <stdio.h>
-
-/* SphinxBase headers. */
-#include <sphinxbase/cmd_ln.h>
-#include <sphinxbase/logmath.h>
-#include <sphinxbase/fe.h>
-#include <sphinxbase/feat.h>
-#include <sphinxbase/bitvec.h>
-#include <sphinxbase/err.h>
-#include <sphinxbase/prim_type.h>
-
-/* Local headers. */
-#include "ps_mllr.h"
-#include "bin_mdef.h"
-#include "tmat.h"
-#include "hmm.h"
-
-/**
- * States in utterance processing.
- */
-typedef enum acmod_state_e {
-    ACMOD_IDLE,		/**< Not in an utterance. */
-    ACMOD_STARTED,      /**< Utterance started, no data yet. */
-    ACMOD_PROCESSING,   /**< Utterance in progress. */
-    ACMOD_ENDED         /**< Utterance ended, still buffering. */
-} acmod_state_t;
-
-/**
- * Dummy senone score value for unintentionally active states.
- */
-#define SENSCR_DUMMY 0x7fff
-
-/**
- * Feature space linear transform structure.
- */
-struct ps_mllr_s {
-    int refcnt;     /**< Reference count. */
-    int n_class;    /**< Number of MLLR classes. */
-    int n_feat;     /**< Number of feature streams. */
-    int *veclen;    /**< Length of input vectors for each stream. */
-    float32 ****A;  /**< Rotation part of mean transformations. */
-    float32 ***b;   /**< Bias part of mean transformations. */
-    float32 ***h;   /**< Diagonal transformation of variances. */
-    int32 *cb2mllr; /**< Mapping from codebooks to transformations. */
-};
-
-/**
- * Acoustic model parameter structure. 
- */
-typedef struct ps_mgau_s ps_mgau_t;
-
-typedef struct ps_mgaufuncs_s {
-    char const *name;
-
-    int (*frame_eval)(ps_mgau_t *mgau,
-                      int16 *senscr,
-                      uint8 *senone_active,
-                      int32 n_senone_active,
-                      mfcc_t ** feat,
-                      int32 frame,
-                      int32 compallsen);
-    int (*transform)(ps_mgau_t *mgau,
-                     ps_mllr_t *mllr);
-    void (*free)(ps_mgau_t *mgau);
-} ps_mgaufuncs_t;    
-
-struct ps_mgau_s {
-    ps_mgaufuncs_t *vt;  /**< vtable of mgau functions. */
-    int frame_idx;       /**< frame counter. */
-};
-
-#define ps_mgau_base(mg) ((ps_mgau_t *)(mg))
-#define ps_mgau_frame_eval(mg,senscr,senone_active,n_senone_active,feat,frame,compallsen) \
-    (*ps_mgau_base(mg)->vt->frame_eval)                                 \
-    (mg, senscr, senone_active, n_senone_active, feat, frame, compallsen)
-#define ps_mgau_transform(mg, mllr)                                  \
-    (*ps_mgau_base(mg)->vt->transform)(mg, mllr)
-#define ps_mgau_free(mg)                                  \
-    (*ps_mgau_base(mg)->vt->free)(mg)
-
-/**
- * Acoustic model structure.
- *
- * This object encapsulates all stages of acoustic processing, from
- * raw audio input to acoustic score output.  The reason for grouping
- * all of these modules together is that they all have to "agree" in
- * their parameterizations, and the configuration of the acoustic and
- * dynamic feature computation is completely dependent on the
- * parameters used to build the original acoustic model (which should
- * by now always be specified in a feat.params file).
- *
- * Because there is not a one-to-one correspondence from blocks of
- * input audio or frames of input features to frames of acoustic
- * scores (due to dynamic feature calculation), results may not be
- * immediately available after input, and the output results will not
- * correspond to the last piece of data input.
- *
- * TODO: In addition, this structure serves the purpose of queueing
- * frames of features (and potentially also scores in the future) for
- * asynchronous passes of recognition operating in parallel.
- */
-struct acmod_s {
-    /* Global objects, not retained. */
-    cmd_ln_t *config;          /**< Configuration. */
-    logmath_t *lmath;          /**< Log-math computation. */
-    glist_t strings;           /**< Temporary acoustic model filenames. */
-
-    /* Feature computation: */
-    fe_t *fe;                  /**< Acoustic feature computation. */
-    feat_t *fcb;               /**< Dynamic feature computation. */
-
-    /* Model parameters: */
-    bin_mdef_t *mdef;          /**< Model definition. */
-    tmat_t *tmat;              /**< Transition matrices. */
-    ps_mgau_t *mgau;           /**< Model parameters. */
-    ps_mllr_t *mllr;           /**< Speaker transformation. */
-
-    /* Senone scoring: */
-    int16 *senone_scores;      /**< GMM scores for current frame. */
-    bitvec_t *senone_active_vec; /**< Active GMMs in current frame. */
-    uint8 *senone_active;      /**< Array of deltas to active GMMs. */
-    int senscr_frame;          /**< Frame index for senone_scores. */
-    int n_senone_active;       /**< Number of active GMMs. */
-    int log_zero;              /**< Zero log-probability value. */
-
-    /* Utterance processing: */
-    mfcc_t **mfc_buf;   /**< Temporary buffer of acoustic features. */
-    mfcc_t ***feat_buf; /**< Temporary buffer of dynamic features. */
-    FILE *rawfh;        /**< File for writing raw audio data. */
-    FILE *mfcfh;        /**< File for writing acoustic feature data. */
-    FILE *senfh;        /**< File for writing senone score data. */
-    FILE *insenfh;	/**< Input senone score file. */
-    long *framepos;     /**< File positions of recent frames in senone file. */
-
-    /* Rawdata collected during decoding */
-    int16 *rawdata;
-    int32 rawdata_size;
-    int32 rawdata_pos;
-
-    /* A whole bunch of flags and counters: */
-    uint8 state;        /**< State of utterance processing. */
-    uint8 compallsen;   /**< Compute all senones? */
-    uint8 grow_feat;    /**< Whether to grow feat_buf. */
-    uint8 insen_swap;   /**< Whether to swap input senone score. */
-
-    frame_idx_t utt_start_frame; /**< Index of the utterance start in the stream, all timings are relative to that. */
-
-    frame_idx_t output_frame; /**< Index of next frame of dynamic features. */
-    frame_idx_t n_mfc_alloc;  /**< Number of frames allocated in mfc_buf */
-    frame_idx_t n_mfc_frame;  /**< Number of frames active in mfc_buf */
-    frame_idx_t mfc_outidx;   /**< Start of active frames in mfc_buf */
-    frame_idx_t n_feat_alloc; /**< Number of frames allocated in feat_buf */
-    frame_idx_t n_feat_frame; /**< Number of frames active in feat_buf */
-    frame_idx_t feat_outidx;  /**< Start of active frames in feat_buf */
-};
-typedef struct acmod_s acmod_t;
-
-/**
- * Initialize an acoustic model.
- *
- * @param config a command-line object containing parameters.  This
- *               pointer is not retained by this object.
- * @param lmath global log-math parameters.
- * @param fe a previously-initialized acoustic feature module to use,
- *           or NULL to create one automatically.  If this is supplied
- *           and its parameters do not match those in the acoustic
- *           model, this function will fail.  This pointer is not retained.
- * @param fe a previously-initialized dynamic feature module to use,
- *           or NULL to create one automatically.  If this is supplied
- *           and its parameters do not match those in the acoustic
- *           model, this function will fail.  This pointer is not retained.
- * @return a newly initialized acmod_t, or NULL on failure.
- */
-acmod_t *acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb);
-
-/**
- * Adapt acoustic model using a linear transform.
- *
- * @param mllr The new transform to use, or NULL to update the existing
- *              transform.  The decoder retains ownership of this pointer,
- *              so you should not attempt to free it manually.  Use
- *              ps_mllr_retain() if you wish to reuse it
- *              elsewhere.
- * @return The updated transform object for this decoder, or
- *         NULL on failure.
- */
-ps_mllr_t *acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr);
-
-/**
- * Start logging senone scores to a filehandle.
- *
- * @param acmod Acoustic model object.
- * @param logfh Filehandle to log to.
- * @return 0 for success, <0 on error.
- */
-int acmod_set_senfh(acmod_t *acmod, FILE *senfh);
-
-/**
- * Start logging MFCCs to a filehandle.
- *
- * @param acmod Acoustic model object.
- * @param logfh Filehandle to log to.
- * @return 0 for success, <0 on error.
- */
-int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh);
-
-/**
- * Start logging raw audio to a filehandle.
- *
- * @param acmod Acoustic model object.
- * @param logfh Filehandle to log to.
- * @return 0 for success, <0 on error.
- */
-int acmod_set_rawfh(acmod_t *acmod, FILE *logfh);
-
-/**
- * Finalize an acoustic model.
- */
-void acmod_free(acmod_t *acmod);
-
-/**
- * Mark the start of an utterance.
- */
-int acmod_start_utt(acmod_t *acmod);
-
-/**
- * Mark the end of an utterance.
- */
-int acmod_end_utt(acmod_t *acmod);
-
-/**
- * Rewind the current utterance, allowing it to be rescored.
- *
- * After calling this function, the internal frame index is reset, and
- * acmod_score() will return scores starting at the first frame of the
- * current utterance.  Currently, acmod_set_grow() must have been
- * called to enable growing the feature buffer in order for this to
- * work.  In the future, senone scores may be cached instead.
- *
- * @return 0 for success, <0 for failure (if the utterance can't be
- *         rewound due to no feature or score data available)
- */
-int acmod_rewind(acmod_t *acmod);
-
-/**
- * Advance the frame index.
- *
- * This function moves to the next frame of input data.  Subsequent
- * calls to acmod_score() will return scores for that frame, until the
- * next call to acmod_advance().
- *
- * @return New frame index.
- */
-int acmod_advance(acmod_t *acmod);
-
-/**
- * Set memory allocation policy for utterance processing.
- *
- * @param grow_feat If non-zero, the internal dynamic feature buffer
- * will expand as necessary to encompass any amount of data fed to the
- * model.
- * @return previous allocation policy.
- */
-int acmod_set_grow(acmod_t *acmod, int grow_feat);
-
-/**
- * TODO: Set queue length for utterance processing.
- *
- * This function allows multiple concurrent passes of search to
- * operate on different parts of the utterance.
- */
-
-/**
- * Feed raw audio data to the acoustic model for scoring.
- *
- * @param inout_raw In: Pointer to buffer of raw samples
- *                  Out: Pointer to next sample to be read
- * @param inout_n_samps In: Number of samples available
- *                      Out: Number of samples remaining
- * @param full_utt If non-zero, this block represents a full
- *                 utterance and should be processed as such.
- * @return Number of frames of data processed.
- */
-int acmod_process_raw(acmod_t *acmod,
-                      int16 const **inout_raw,
-                      size_t *inout_n_samps,
-                      int full_utt);
-
-/**
- * Feed acoustic feature data into the acoustic model for scoring.
- *
- * @param inout_cep In: Pointer to buffer of features
- *                  Out: Pointer to next frame to be read
- * @param inout_n_frames In: Number of frames available
- *                      Out: Number of frames remaining
- * @param full_utt If non-zero, this block represents a full
- *                 utterance and should be processed as such.
- * @return Number of frames of data processed.
- */
-int acmod_process_cep(acmod_t *acmod,
-                      mfcc_t ***inout_cep,
-                      int *inout_n_frames,
-                      int full_utt);
-
-/**
- * Feed dynamic feature data into the acoustic model for scoring.
- *
- * Unlike acmod_process_raw() and acmod_process_cep(), this function
- * accepts a single frame at a time.  This is because there is no need
- * to do buffering when using dynamic features as input.  However, if
- * the dynamic feature buffer is full, this function will fail, so you
- * should either always check the return value, or always pair a call
- * to it with a call to acmod_score().
- *
- * @param feat Pointer to one frame of dynamic features.
- * @return Number of frames processed (either 0 or 1).
- */
-int acmod_process_feat(acmod_t *acmod,
-                       mfcc_t **feat);
-
-/**
- * Set up a senone score dump file for input.
- *
- * @param insenfh File handle of dump file
- * @return 0 for success, <0 for failure
- */
-int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh);
-
-/**
- * Read one frame of scores from senone score dump file.
- *
- * @return Number of frames read or <0 on error.
- */
-int acmod_read_scores(acmod_t *acmod);
-
-/**
- * Get a frame of dynamic feature data.
- *
- * @param inout_frame_idx Input: frame index to get, or NULL
- *                        to obtain features for the most recent frame.
- *                        Output: frame index corresponding to this
- *                        set of features.
- * @return Feature array, or NULL if requested frame is not available.
- */
-mfcc_t **acmod_get_frame(acmod_t *acmod, int *inout_frame_idx);
-
-/**
- * Score one frame of data.
- *
- * @param inout_frame_idx Input: frame index to score, or NULL
- *                        to obtain scores for the most recent frame.
- *                        Output: frame index corresponding to this
- *                        set of scores.
- * @return Array of senone scores for this frame, or NULL if no frame
- *         is available for scoring (such as if a frame index is
- *         requested that is not yet or no longer available).  The
- *         data pointed to persists only until the next call to
- *         acmod_score() or acmod_advance().
- */
-int16 const *acmod_score(acmod_t *acmod,
-                         int *inout_frame_idx);
-
-/**
- * Write senone dump file header.
- */
-int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh);
-
-/**
- * Write a frame of senone scores to a dump file.
- */
-int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
-                       int16 const *senscr, FILE *senfh);
-
-
-/**
- * Get best score and senone index for current frame.
- */
-int acmod_best_score(acmod_t *acmod, int *out_best_senid);
-
-/**
- * Clear set of active senones.
- */
-void acmod_clear_active(acmod_t *acmod);
-
-/**
- * Activate senones associated with an HMM.
- */
-void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm);
-
-/**
- * Activate a single senone.
- */
-#define acmod_activate_sen(acmod, sen) bitvec_set((acmod)->senone_active_vec, sen)
-
-/**
- * Build active list from 
- */
-int32 acmod_flags2list(acmod_t *acmod);
-
-/**
- * Get the offset of the utterance start of the current stream, helpful for stream-wide timing.
- */
-int32 acmod_stream_offset(acmod_t *acmod);
-
-/**
- * Reset the current stream
- */
-void acmod_start_stream(acmod_t *acmod);
-
-/**
- * Sets the limit of the raw audio data to store
- */
-void acmod_set_rawdata_size(acmod_t *acmod, int32 size);
-
-/**
- * Retrieves the raw data collected during utterance decoding
- */
-void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size);
-
-#endif /* __ACMOD_H__ */
author	Moonchild <moonchild@palemoon.org>	2020-06-01 21:58:35 +0000
committer	Moonchild <moonchild@palemoon.org>	2020-06-01 21:58:35 +0000
commit	c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810 (patch)
tree	c7672903a2030d37f861b12900165a015f49d10a /media/pocketsphinx/src/acmod.h
parent	451509e2c0188a4164d4b3d1d9f5839ed1e95246 (diff)
parent	744b044935f7d1d67fbe0df42d898efcbdd00536 (diff)
download	UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar.gz UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar.lz UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.tar.xz UXP-c6ca4380e9e5e95df9de02daf8bfb9a6ebc22810.zip