diff options
Diffstat (limited to 'media/pocketsphinx/pocketsphinx.h')
-rw-r--r-- | media/pocketsphinx/pocketsphinx.h | 658 |
1 files changed, 0 insertions, 658 deletions
diff --git a/media/pocketsphinx/pocketsphinx.h b/media/pocketsphinx/pocketsphinx.h deleted file mode 100644 index 7d49f9e88..000000000 --- a/media/pocketsphinx/pocketsphinx.h +++ /dev/null @@ -1,658 +0,0 @@ -/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 1999-2008 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ -/** - * @file pocketsphinx.h Main header file for the PocketSphinx decoder. - */ - -#ifndef __POCKETSPHINX_H__ -#define __POCKETSPHINX_H__ - -#ifdef __cplusplus -extern "C" { -#endif -#if 0 -} -#endif - -/* System headers we need. */ -#include <stdio.h> - -/* SphinxBase headers we need. */ -#include <sphinxbase/cmd_ln.h> -#include <sphinxbase/logmath.h> -#include <sphinxbase/fe.h> -#include <sphinxbase/feat.h> - -/* PocketSphinx headers (not many of them!) */ -#include <pocketsphinx_export.h> -#include <cmdln_macro.h> -#include <ps_lattice.h> -#include <ps_mllr.h> - -/** - * PocketSphinx speech recognizer object. - */ -typedef struct ps_decoder_s ps_decoder_t; - -#include <ps_search.h> - -/** - * PocketSphinx N-best hypothesis iterator object. - */ -typedef struct ps_astar_s ps_nbest_t; - -/** - * PocketSphinx segmentation iterator object. - */ -typedef struct ps_seg_s ps_seg_t; - -/** - * Sets default grammar and language model if they are not set explicitly and - * are present in the default search path. - */ -POCKETSPHINX_EXPORT void -ps_default_search_args(cmd_ln_t *); - -/** - * Initialize the decoder from a configuration object. - * - * @note The decoder retains ownership of the pointer - * <code>config</code>, so you must not attempt to free it manually. - * If you wish to reuse it elsewhere, call cmd_ln_retain() on it. - * - * @param config a command-line structure, as created by - * cmd_ln_parse_r() or cmd_ln_parse_file_r(). - */ -POCKETSPHINX_EXPORT -ps_decoder_t *ps_init(cmd_ln_t *config); - -/** - * Reinitialize the decoder with updated configuration. - * - * This function allows you to switch the acoustic model, dictionary, - * or other configuration without creating an entirely new decoding - * object. - * - * @note The decoder retains ownership of the pointer - * <code>config</code>, so you must not attempt to free it manually. - * If you wish to reuse it elsewhere, call cmd_ln_retain() on it. - * - * @param ps Decoder. - * @param config An optional new configuration to use. If this is - * NULL, the previous configuration will be reloaded, - * with any changes applied. - * @return 0 for success, <0 for failure. - */ -POCKETSPHINX_EXPORT -int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config); - -/** - * Returns the argument definitions used in ps_init(). - * - * This is here to avoid exporting global data, which is problematic - * on Win32 and Symbian (and possibly other platforms). - */ -POCKETSPHINX_EXPORT -arg_t const *ps_args(void); - -/** - * Retain a pointer to the decoder. - * - * This increments the reference count on the decoder, allowing it to - * be shared between multiple parent objects. In general you will not - * need to use this function, ever. It is mainly here for the - * convenience of scripting language bindings. - * - * @return pointer to retained decoder. - */ -POCKETSPHINX_EXPORT -ps_decoder_t *ps_retain(ps_decoder_t *ps); - -/** - * Finalize the decoder. - * - * This releases all resources associated with the decoder, including - * any language models or grammars which have been added to it, and - * the initial configuration object passed to ps_init(). - * - * @param ps Decoder to be freed. - * @return New reference count (0 if freed). - */ -POCKETSPHINX_EXPORT -int ps_free(ps_decoder_t *ps); - -/** - * Get the configuration object for this decoder. - * - * @return The configuration object for this decoder. The decoder - * retains ownership of this pointer, so you should not - * attempt to free it manually. Use cmd_ln_retain() if you - * wish to reuse it elsewhere. - */ -POCKETSPHINX_EXPORT -cmd_ln_t *ps_get_config(ps_decoder_t *ps); - -/** - * Get the log-math computation object for this decoder. - * - * @return The log-math object for this decoder. The decoder retains - * ownership of this pointer, so you should not attempt to - * free it manually. Use logmath_retain() if you wish to - * reuse it elsewhere. - */ -POCKETSPHINX_EXPORT -logmath_t *ps_get_logmath(ps_decoder_t *ps); - -/** - * Get the feature extraction object for this decoder. - * - * @return The feature extraction object for this decoder. The - * decoder retains ownership of this pointer, so you should - * not attempt to free it manually. Use fe_retain() if you - * wish to reuse it elsewhere. - */ -POCKETSPHINX_EXPORT -fe_t *ps_get_fe(ps_decoder_t *ps); - -/** - * Get the dynamic feature computation object for this decoder. - * - * @return The dynamic feature computation object for this decoder. The - * decoder retains ownership of this pointer, so you should - * not attempt to free it manually. Use feat_retain() if you - * wish to reuse it elsewhere. - */ -POCKETSPHINX_EXPORT -feat_t *ps_get_feat(ps_decoder_t *ps); - -/** - * Adapt current acoustic model using a linear transform. - * - * @param mllr The new transform to use, or NULL to update the existing - * transform. The decoder retains ownership of this pointer, - * so you should not attempt to free it manually. Use - * ps_mllr_retain() if you wish to reuse it - * elsewhere. - * @return The updated transform object for this decoder, or - * NULL on failure. - */ -POCKETSPHINX_EXPORT -ps_mllr_t *ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr); - -/** - * Reload the pronunciation dictionary from a file. - * - * This function replaces the current pronunciation dictionary with - * the one stored in dictfile. This also causes the active search - * module(s) to be reinitialized, in the same manner as calling - * ps_add_word() with update=TRUE. - * - * @param dictfile Path to dictionary file to load. - * @param fdictfile Path to filler dictionary to load, or NULL to keep - * the existing filler dictionary. - * @param format Format of the dictionary file, or NULL to determine - * automatically (currently unused,should be NULL) - */ -POCKETSPHINX_EXPORT -int ps_load_dict(ps_decoder_t *ps, char const *dictfile, - char const *fdictfile, char const *format); - -/** - * Dump the current pronunciation dictionary to a file. - * - * This function dumps the current pronunciation dictionary to a tex - * - * @param dictfile Path to file where dictionary will be written. - * @param format Format of the dictionary file, or NULL for the - * default (text) format (currently unused, should be NULL) - */ -POCKETSPHINX_EXPORT -int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format); - -/** - * Add a word to the pronunciation dictionary. - * - * This function adds a word to the pronunciation dictionary and the - * current language model (but, obviously, not to the current FSG if - * FSG mode is enabled). If the word is already present in one or the - * other, it does whatever is necessary to ensure that the word can be - * recognized. - * - * @param word Word string to add. - * @param phones Whitespace-separated list of phoneme strings - * describing pronunciation of <code>word</code>. - * @param update If TRUE, update the search module (whichever one is - * currently active) to recognize the newly added word. - * If adding multiple words, it is more efficient to - * pass FALSE here in all but the last word. - * @return The internal ID (>= 0) of the newly added word, or <0 on - * failure. - */ -POCKETSPHINX_EXPORT -int ps_add_word(ps_decoder_t *ps, - char const *word, - char const *phones, - int update); - -/** - * Lookup for the word in the dictionary and return phone transcription - * for it. - * - * @param ps Pocketsphinx decoder - * @param word Word to look for - * - * @return Whitespace-spearated phone string describing the pronunciation of the <code>word</code> - * or NULL if word is not present in the dictionary. The string is - * allocated and must be freed by the user. - */ -POCKETSPHINX_EXPORT -char *ps_lookup_word(ps_decoder_t *ps, - const char *word); - -/** - * Decode a raw audio stream. - * - * No headers are recognized in this files. The configuration - * parameters <tt>-samprate</tt> and <tt>-input_endian</tt> are used - * to determine the sampling rate and endianness of the stream, - * respectively. Audio is always assumed to be 16-bit signed PCM. - * - * @param ps Decoder. - * @param rawfh Previously opened file stream. - * @param maxsamps Maximum number of samples to read from rawfh, or -1 - * to read until end-of-file. - * @return Number of samples of audio. - */ -POCKETSPHINX_EXPORT -long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, - long maxsamps); - -/** - * Decode a senone score dump file. - * - * @param ps Decoder - * @param fh Previously opened file handle positioned at start of file. - * @return Number of frames read. - */ -POCKETSPHINX_EXPORT -int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh); - -/** - * Start processing of the stream of speech. Channel parameters like - * noise-level are maintained for the stream and reused among utterances. - * Times returned in segment iterators are also stream-wide. - * - * @return 0 for success, <0 on error. - */ -POCKETSPHINX_EXPORT -int ps_start_stream(ps_decoder_t *ps); - -/** - * Start utterance processing. - * - * This function should be called before any utterance data is passed - * to the decoder. It marks the start of a new utterance and - * reinitializes internal data structures. - * - * @param ps Decoder to be started. - * @return 0 for success, <0 on error. - */ -POCKETSPHINX_EXPORT -int ps_start_utt(ps_decoder_t *ps); - -/** - * Decode raw audio data. - * - * @param ps Decoder. - * @param no_search If non-zero, perform feature extraction but don't - * do any recognition yet. This may be necessary if - * your processor has trouble doing recognition in - * real-time. - * @param full_utt If non-zero, this block of data is a full utterance - * worth of data. This may allow the recognizer to - * produce more accurate results. - * @return Number of frames of data searched, or <0 for error. - */ -POCKETSPHINX_EXPORT -int ps_process_raw(ps_decoder_t *ps, - int16 const *data, - size_t n_samples, - int no_search, - int full_utt); - -/** - * Decode acoustic feature data. - * - * @param ps Decoder. - * @param no_search If non-zero, perform feature extraction but don't - * do any recognition yet. This may be necessary if - * your processor has trouble doing recognition in - * real-time. - * @param full_utt If non-zero, this block of data is a full utterance - * worth of data. This may allow the recognizer to - * produce more accurate results. - * @return Number of frames of data searched, or <0 for error. - */ -POCKETSPHINX_EXPORT -int ps_process_cep(ps_decoder_t *ps, - mfcc_t **data, - int n_frames, - int no_search, - int full_utt); - -/** - * Get the number of frames of data searched. - * - * Note that there is a delay between this and the number of frames of - * audio which have been input to the system. This is due to the fact - * that acoustic features are computed using a sliding window of - * audio, and dynamic features are computed over a sliding window of - * acoustic features. - * - * @param ps Decoder. - * @return Number of frames of speech data which have been recognized - * so far. - */ -POCKETSPHINX_EXPORT -int ps_get_n_frames(ps_decoder_t *ps); - -/** - * End utterance processing. - * - * @param ps Decoder. - * @return 0 for success, <0 on error - */ -POCKETSPHINX_EXPORT -int ps_end_utt(ps_decoder_t *ps); - -/** - * Get hypothesis string and path score. - * - * @param ps Decoder. - * @param out_best_score Output: path score corresponding to returned string. - * @return String containing best hypothesis at this point in - * decoding. NULL if no hypothesis is available. - */ -POCKETSPHINX_EXPORT -char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); - -/** - * Get hypothesis string and final flag. - * - * @param ps Decoder. - * @param out_is_best_score Output: if hypothesis is reached final state in the grammar. - * @return String containing best hypothesis at this point in - * decoding. NULL if no hypothesis is available. - */ -POCKETSPHINX_EXPORT -char const *ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final); - -/** - * Get posterior probability. - * - * @note Unless the -bestpath option is enabled, this function will - * always return zero (corresponding to a posterior probability of - * 1.0). Even if -bestpath is enabled, it will also return zero when - * called on a partial result. Ongoing research into effective - * confidence annotation for partial hypotheses may result in these - * restrictions being lifted in future versions. - * - * @param ps Decoder. - * @return Posterior probability of the best hypothesis. - */ -POCKETSPHINX_EXPORT -int32 ps_get_prob(ps_decoder_t *ps); - -/** - * Get word lattice. - * - * There isn't much you can do with this so far, a public API will - * appear in the future. - * - * @param ps Decoder. - * @return Word lattice object containing all hypotheses so far. NULL - * if no hypotheses are available. This pointer is owned by - * the decoder and you should not attempt to free it manually. - * It is only valid until the next utterance, unless you use - * ps_lattice_retain() to retain it. - */ -POCKETSPHINX_EXPORT -ps_lattice_t *ps_get_lattice(ps_decoder_t *ps); - -/** - * Get an iterator over the word segmentation for the best hypothesis. - * - * @param ps Decoder. - * @param out_best_score Output: path score corresponding to hypothesis. - * @return Iterator over the best hypothesis at this point in - * decoding. NULL if no hypothesis is available. - */ -POCKETSPHINX_EXPORT -ps_seg_t *ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score); - -/** - * Get the next segment in a word segmentation. - * - * @param seg Segment iterator. - * @return Updated iterator with the next segment. NULL at end of - * utterance (the iterator will be freed in this case). - */ -POCKETSPHINX_EXPORT -ps_seg_t *ps_seg_next(ps_seg_t *seg); - -/** - * Get word string from a segmentation iterator. - * - * @param seg Segment iterator. - * @return Read-only string giving string name of this segment. This - * is only valid until the next call to ps_seg_next(). - */ -POCKETSPHINX_EXPORT -char const *ps_seg_word(ps_seg_t *seg); - -/** - * Get inclusive start and end frames from a segmentation iterator. - * - * @note These frame numbers are inclusive, i.e. the end frame refers - * to the last frame in which the given word or other segment was - * active. Therefore, the actual duration is *out_ef - *out_sf + 1. - * - * @param seg Segment iterator. - * @param out_sf Output: First frame index in segment. - * @param out_sf Output: Last frame index in segment. - */ -POCKETSPHINX_EXPORT -void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef); - -/** - * Get language, acoustic, and posterior probabilities from a - * segmentation iterator. - * - * @note Unless the -bestpath option is enabled, this function will - * always return zero (corresponding to a posterior probability of - * 1.0). Even if -bestpath is enabled, it will also return zero when - * called on a partial result. Ongoing research into effective - * confidence annotation for partial hypotheses may result in these - * restrictions being lifted in future versions. - * - * @param out_ascr Output: acoustic model score for this segment. - * @param out_lscr Output: language model score for this segment. - * @param out_lback Output: language model backoff mode for this - * segment (i.e. the number of words used in - * calculating lscr). This field is, of course, only - * meaningful for N-Gram models. - * @return Log posterior probability of current segment. Log is - * expressed in the log-base used in the decoder. To convert - * to linear floating-point, use logmath_exp(ps_get_logmath(), - * pprob). - */ -POCKETSPHINX_EXPORT -int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback); - -/** - * Finish iterating over a word segmentation early, freeing resources. - */ -POCKETSPHINX_EXPORT -void ps_seg_free(ps_seg_t *seg); - -/** - * Get an iterator over the best hypotheses, optionally within a - * selected region of the utterance. Iterator is empty now, it must - * be advanced with ps_nbest_next first. The function may also - * return a NULL which means that there is no hypothesis available for this - * utterance. - * - * @param ps Decoder. - * @param sf Start frame for N-best search (0 for whole utterance) - * @param ef End frame for N-best search (-1 for whole utterance) - * @param ctx1 First word of trigram context (NULL for whole utterance) - * @param ctx2 First word of trigram context (NULL for whole utterance) - * @return Iterator over N-best hypotheses or NULL if no hypothesis is available - */ -POCKETSPHINX_EXPORT -ps_nbest_t *ps_nbest(ps_decoder_t *ps, int sf, int ef, - char const *ctx1, char const *ctx2); - -/** - * Move an N-best list iterator forward. - * - * @param nbest N-best iterator. - * @return Updated N-best iterator, or NULL if no more hypotheses are - * available (iterator is freed ni this case). - */ -POCKETSPHINX_EXPORT -ps_nbest_t *ps_nbest_next(ps_nbest_t *nbest); - -/** - * Get the hypothesis string from an N-best list iterator. - * - * @param nbest N-best iterator. - * @param out_score Output: Path score for this hypothesis. - * @return String containing next best hypothesis. - */ -POCKETSPHINX_EXPORT -char const *ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score); - -/** - * Get the word segmentation from an N-best list iterator. - * - * @param nbest N-best iterator. - * @param out_score Output: Path score for this hypothesis. - * @return Iterator over the next best hypothesis. - */ -POCKETSPHINX_EXPORT -ps_seg_t *ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score); - -/** - * Finish N-best search early, releasing resources. - * - * @param nbest N-best iterator. - */ -POCKETSPHINX_EXPORT -void ps_nbest_free(ps_nbest_t *nbest); - -/** - * Get performance information for the current utterance. - * - * @param ps Decoder. - * @param out_nspeech Output: Number of seconds of speech. - * @param out_ncpu Output: Number of seconds of CPU time used. - * @param out_nwall Output: Number of seconds of wall time used. - */ -POCKETSPHINX_EXPORT -void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, - double *out_ncpu, double *out_nwall); - -/** - * Get overall performance information. - * - * @param ps Decoder. - * @param out_nspeech Output: Number of seconds of speech. - * @param out_ncpu Output: Number of seconds of CPU time used. - * @param out_nwall Output: Number of seconds of wall time used. - */ -POCKETSPHINX_EXPORT -void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, - double *out_ncpu, double *out_nwall); - -/** - * Checks if the last feed audio buffer contained speech - * - * @param ps Decoder. - * @return 1 if last buffer contained speech, 0 - otherwise - */ -POCKETSPHINX_EXPORT -uint8 ps_get_in_speech(ps_decoder_t *ps); - - -/** - * Sets the limit of the raw audio data to store in decoder - * to retrieve it later on ps_get_rawdata. - * - * @param ps Decoder - * @param size bytes of the utterance to store - */ -POCKETSPHINX_EXPORT -void ps_set_rawdata_size(ps_decoder_t *ps, int32 size); - - -/** - * Retrieves the raw data collected during utterance decoding. - * - * @param ps Decoder - * @param buffer preallocated buffer to store the data, must be within the limit - * set before - * @param size size of the data collected in samples (not bytes). - */ -POCKETSPHINX_EXPORT -void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size); - -/** - * @mainpage PocketSphinx API Documentation - * @author David Huggins-Daines <dhuggins@cs.cmu.edu> - * @version 0.6 - * @date March, 2010 - * - * @section intro_sec Introduction - * - * This is the API documentation for the PocketSphinx speech - * recognition engine. The main API calls are documented in - * <pocketsphinx.h>. - */ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __POCKETSPHINX_H__ */ |