From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- media/sphinxbase/config.h | 160 ++ media/sphinxbase/moz.build | 82 + media/sphinxbase/pocketsphinx-COPYING.txt | 36 + media/sphinxbase/sbthread.patch | 14 + media/sphinxbase/sphinx_config.h | 42 + media/sphinxbase/sphinxbase/ad.h | 126 + media/sphinxbase/sphinxbase/agc.h | 202 ++ media/sphinxbase/sphinxbase/bio.h | 304 ++ media/sphinxbase/sphinxbase/bitvec.h | 155 + media/sphinxbase/sphinxbase/byteorder.h | 98 + media/sphinxbase/sphinxbase/case.h | 135 + media/sphinxbase/sphinxbase/ckd_alloc.h | 310 ++ media/sphinxbase/sphinxbase/clapack_lite.h | 36 + media/sphinxbase/sphinxbase/cmd_ln.h | 587 ++++ media/sphinxbase/sphinxbase/cmn.h | 191 ++ media/sphinxbase/sphinxbase/err.h | 229 ++ media/sphinxbase/sphinxbase/f2c.h | 218 ++ media/sphinxbase/sphinxbase/fe.h | 617 ++++ media/sphinxbase/sphinxbase/feat.h | 469 +++ media/sphinxbase/sphinxbase/filename.h | 112 + media/sphinxbase/sphinxbase/fixpoint.h | 145 + media/sphinxbase/sphinxbase/fsg_model.h | 364 +++ media/sphinxbase/sphinxbase/genrand.h | 180 ++ media/sphinxbase/sphinxbase/glist.h | 242 ++ media/sphinxbase/sphinxbase/hash_table.h | 443 +++ media/sphinxbase/sphinxbase/heap.h | 153 + media/sphinxbase/sphinxbase/huff_code.h | 143 + media/sphinxbase/sphinxbase/jsgf.h | 209 ++ media/sphinxbase/sphinxbase/listelem_alloc.h | 125 + media/sphinxbase/sphinxbase/logmath.h | 237 ++ media/sphinxbase/sphinxbase/matrix.h | 210 ++ media/sphinxbase/sphinxbase/mmio.h | 85 + media/sphinxbase/sphinxbase/mulaw.h | 99 + media/sphinxbase/sphinxbase/ngram_model.h | 711 +++++ media/sphinxbase/sphinxbase/pio.h | 302 ++ media/sphinxbase/sphinxbase/prim_type.h | 183 ++ media/sphinxbase/sphinxbase/profile.h | 231 ++ media/sphinxbase/sphinxbase/sbthread.h | 221 ++ media/sphinxbase/sphinxbase/sphinx_config.h | 42 + media/sphinxbase/sphinxbase/sphinxbase_export.h | 15 + media/sphinxbase/sphinxbase/strfuncs.h | 158 ++ media/sphinxbase/sphinxbase/yin.h | 136 + .../sphinxbase/src/libsphinxbase/fe/fe_interface.c | 776 +++++ .../sphinxbase/src/libsphinxbase/fe/fe_internal.h | 216 ++ media/sphinxbase/src/libsphinxbase/fe/fe_noise.c | 425 +++ media/sphinxbase/src/libsphinxbase/fe/fe_noise.h | 66 + .../src/libsphinxbase/fe/fe_prespch_buf.c | 182 ++ .../src/libsphinxbase/fe/fe_prespch_buf.h | 79 + media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c | 1377 +++++++++ media/sphinxbase/src/libsphinxbase/fe/fe_type.h | 65 + media/sphinxbase/src/libsphinxbase/fe/fe_warp.c | 252 ++ media/sphinxbase/src/libsphinxbase/fe/fe_warp.h | 90 + .../src/libsphinxbase/fe/fe_warp_affine.c | 181 ++ .../src/libsphinxbase/fe/fe_warp_affine.h | 76 + .../src/libsphinxbase/fe/fe_warp_inverse_linear.c | 178 ++ .../src/libsphinxbase/fe/fe_warp_inverse_linear.h | 77 + .../libsphinxbase/fe/fe_warp_piecewise_linear.c | 223 ++ .../libsphinxbase/fe/fe_warp_piecewise_linear.h | 77 + media/sphinxbase/src/libsphinxbase/fe/fixlog.c | 229 ++ media/sphinxbase/src/libsphinxbase/fe/yin.c | 412 +++ media/sphinxbase/src/libsphinxbase/feat/agc.c | 227 ++ media/sphinxbase/src/libsphinxbase/feat/cmn.c | 238 ++ .../sphinxbase/src/libsphinxbase/feat/cmn_prior.c | 184 ++ media/sphinxbase/src/libsphinxbase/feat/feat.c | 1497 ++++++++++ media/sphinxbase/src/libsphinxbase/feat/lda.c | 158 ++ media/sphinxbase/src/libsphinxbase/lm/fsg_model.c | 944 +++++++ media/sphinxbase/src/libsphinxbase/lm/jsgf.c | 943 +++++++ .../src/libsphinxbase/lm/jsgf_internal.h | 140 + .../sphinxbase/src/libsphinxbase/lm/jsgf_parser.c | 1799 ++++++++++++ .../sphinxbase/src/libsphinxbase/lm/jsgf_parser.h | 90 + .../sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c | 2199 +++++++++++++++ .../sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h | 352 +++ media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c | 258 ++ media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h | 177 ++ .../src/libsphinxbase/lm/lm3g_templates.c | 560 ++++ .../sphinxbase/src/libsphinxbase/lm/ngram_model.c | 1129 ++++++++ .../src/libsphinxbase/lm/ngram_model_arpa.c | 660 +++++ .../src/libsphinxbase/lm/ngram_model_arpa.h | 86 + .../src/libsphinxbase/lm/ngram_model_dmp.c | 969 +++++++ .../src/libsphinxbase/lm/ngram_model_dmp.h | 92 + .../src/libsphinxbase/lm/ngram_model_internal.h | 282 ++ .../src/libsphinxbase/lm/ngram_model_set.c | 870 ++++++ .../src/libsphinxbase/lm/ngram_model_set.h | 71 + media/sphinxbase/src/libsphinxbase/util/bio.c | 644 +++++ media/sphinxbase/src/libsphinxbase/util/bitvec.c | 101 + .../sphinxbase/src/libsphinxbase/util/blas_lite.c | 2147 ++++++++++++++ media/sphinxbase/src/libsphinxbase/util/case.c | 141 + .../sphinxbase/src/libsphinxbase/util/ckd_alloc.c | 427 +++ media/sphinxbase/src/libsphinxbase/util/cmd_ln.c | 1082 +++++++ media/sphinxbase/src/libsphinxbase/util/dtoa.c | 2979 ++++++++++++++++++++ media/sphinxbase/src/libsphinxbase/util/err.c | 297 ++ media/sphinxbase/src/libsphinxbase/util/errno.c | 51 + media/sphinxbase/src/libsphinxbase/util/f2c_lite.c | 551 ++++ media/sphinxbase/src/libsphinxbase/util/filename.c | 120 + media/sphinxbase/src/libsphinxbase/util/genrand.c | 234 ++ media/sphinxbase/src/libsphinxbase/util/glist.c | 271 ++ .../sphinxbase/src/libsphinxbase/util/hash_table.c | 713 +++++ media/sphinxbase/src/libsphinxbase/util/heap.c | 292 ++ .../sphinxbase/src/libsphinxbase/util/huff_code.c | 651 +++++ .../src/libsphinxbase/util/listelem_alloc.c | 294 ++ media/sphinxbase/src/libsphinxbase/util/logmath.c | 483 ++++ media/sphinxbase/src/libsphinxbase/util/matrix.c | 313 ++ media/sphinxbase/src/libsphinxbase/util/mmio.c | 257 ++ media/sphinxbase/src/libsphinxbase/util/pio.c | 655 +++++ media/sphinxbase/src/libsphinxbase/util/profile.c | 345 +++ media/sphinxbase/src/libsphinxbase/util/sbthread.c | 741 +++++ media/sphinxbase/src/libsphinxbase/util/slamch.c | 1029 +++++++ .../src/libsphinxbase/util/slapack_lite.c | 1461 ++++++++++ media/sphinxbase/src/libsphinxbase/util/strfuncs.c | 194 ++ media/sphinxbase/src/libsphinxbase/util/utf8.c | 75 + media/sphinxbase/update.sh | 41 + 111 files changed, 44252 insertions(+) create mode 100644 media/sphinxbase/config.h create mode 100644 media/sphinxbase/moz.build create mode 100644 media/sphinxbase/pocketsphinx-COPYING.txt create mode 100644 media/sphinxbase/sbthread.patch create mode 100644 media/sphinxbase/sphinx_config.h create mode 100644 media/sphinxbase/sphinxbase/ad.h create mode 100644 media/sphinxbase/sphinxbase/agc.h create mode 100644 media/sphinxbase/sphinxbase/bio.h create mode 100644 media/sphinxbase/sphinxbase/bitvec.h create mode 100644 media/sphinxbase/sphinxbase/byteorder.h create mode 100644 media/sphinxbase/sphinxbase/case.h create mode 100644 media/sphinxbase/sphinxbase/ckd_alloc.h create mode 100644 media/sphinxbase/sphinxbase/clapack_lite.h create mode 100644 media/sphinxbase/sphinxbase/cmd_ln.h create mode 100644 media/sphinxbase/sphinxbase/cmn.h create mode 100644 media/sphinxbase/sphinxbase/err.h create mode 100644 media/sphinxbase/sphinxbase/f2c.h create mode 100644 media/sphinxbase/sphinxbase/fe.h create mode 100644 media/sphinxbase/sphinxbase/feat.h create mode 100644 media/sphinxbase/sphinxbase/filename.h create mode 100644 media/sphinxbase/sphinxbase/fixpoint.h create mode 100644 media/sphinxbase/sphinxbase/fsg_model.h create mode 100644 media/sphinxbase/sphinxbase/genrand.h create mode 100644 media/sphinxbase/sphinxbase/glist.h create mode 100644 media/sphinxbase/sphinxbase/hash_table.h create mode 100644 media/sphinxbase/sphinxbase/heap.h create mode 100644 media/sphinxbase/sphinxbase/huff_code.h create mode 100644 media/sphinxbase/sphinxbase/jsgf.h create mode 100644 media/sphinxbase/sphinxbase/listelem_alloc.h create mode 100644 media/sphinxbase/sphinxbase/logmath.h create mode 100644 media/sphinxbase/sphinxbase/matrix.h create mode 100644 media/sphinxbase/sphinxbase/mmio.h create mode 100644 media/sphinxbase/sphinxbase/mulaw.h create mode 100644 media/sphinxbase/sphinxbase/ngram_model.h create mode 100644 media/sphinxbase/sphinxbase/pio.h create mode 100644 media/sphinxbase/sphinxbase/prim_type.h create mode 100644 media/sphinxbase/sphinxbase/profile.h create mode 100644 media/sphinxbase/sphinxbase/sbthread.h create mode 100644 media/sphinxbase/sphinxbase/sphinx_config.h create mode 100644 media/sphinxbase/sphinxbase/sphinxbase_export.h create mode 100644 media/sphinxbase/sphinxbase/strfuncs.h create mode 100644 media/sphinxbase/sphinxbase/yin.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_interface.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_internal.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_noise.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_noise.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_type.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h create mode 100644 media/sphinxbase/src/libsphinxbase/fe/fixlog.c create mode 100644 media/sphinxbase/src/libsphinxbase/fe/yin.c create mode 100644 media/sphinxbase/src/libsphinxbase/feat/agc.c create mode 100644 media/sphinxbase/src/libsphinxbase/feat/cmn.c create mode 100644 media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c create mode 100644 media/sphinxbase/src/libsphinxbase/feat/feat.c create mode 100644 media/sphinxbase/src/libsphinxbase/feat/lda.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/fsg_model.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/jsgf.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c create mode 100644 media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h create mode 100644 media/sphinxbase/src/libsphinxbase/util/bio.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/bitvec.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/blas_lite.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/case.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/ckd_alloc.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/cmd_ln.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/dtoa.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/err.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/errno.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/f2c_lite.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/filename.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/genrand.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/glist.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/hash_table.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/heap.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/huff_code.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/listelem_alloc.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/logmath.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/matrix.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/mmio.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/pio.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/profile.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/sbthread.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/slamch.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/slapack_lite.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/strfuncs.c create mode 100644 media/sphinxbase/src/libsphinxbase/util/utf8.c create mode 100755 media/sphinxbase/update.sh (limited to 'media/sphinxbase') diff --git a/media/sphinxbase/config.h b/media/sphinxbase/config.h new file mode 100644 index 000000000..ccc4ecb35 --- /dev/null +++ b/media/sphinxbase/config.h @@ -0,0 +1,160 @@ +#if ( defined(_WIN32) || defined(__CYGWIN__) ) +/* include/sphinx_config.h, defaults for Win32 */ +/* sphinx_config.h: Externally visible configuration parameters for + * SphinxBase. + */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Enable thread safety */ +#define ENABLE_THREADS + +/* The Thread Local Storage class */ +#define SPHINXBASE_TLS __declspec(thread) + +/* Use Q15 fixed-point computation */ +/* #undef FIXED16 */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* Enable matrix algebra with LAPACK */ +#define WITH_LAPACK + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 + +/* We don't have popen, but we do have _popen */ +/* #define HAVE_POPEN 1 */ + +/* We do have perror */ +#define HAVE_PERROR 1 + +/* We have sys/stat.h */ +#define HAVE_SYS_STAT_H 1 + +/* We do not have unistd.h. */ +#define YY_NO_UNISTD_H 1 + +/* Extension for executables */ +#define EXEEXT ".exe" +#else +/* include/config.h. Generated from config.h.in by configure. */ +/* include/config.h.in. Generated from configure.in by autoheader. */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Enable thread safety */ +#define ENABLE_THREADS /**/ + +/* Use Q15 fixed-point computation */ +/* #undef FIXED16 */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define if you have the iconv() function. */ +/* #define HAVE_ICONV 1 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `asound' library (-lasound). */ +/* #define HAVE_LIBASOUND 1 */ + +/* Define to 1 if you have the `blas' library (-lblas). */ +/* #define HAVE_LIBBLAS 1 */ + +/* Define to 1 if you have the `lapack' library (-llapack). */ +/* #define HAVE_LIBLAPACK 1 */ + +/* Define to 1 if you have the `m' library (-lm). */ +#define HAVE_LIBM 1 + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_LIBPTHREAD 1 + +/* Define to 1 if the system has the type `long long'. */ +#define HAVE_LONG_LONG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `perror' function. */ +#define HAVE_PERROR 1 + +/* Define to 1 if you have the `popen' function. */ +#define HAVE_POPEN 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SNDFILE_H 1 + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +/* Define as the return type of signal handlers (`int' or `void'). */ +#define RETSIGTYPE void + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* Enable debugging output */ +/* #undef SPHINX_DEBUG */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Enable matrix algebra with LAPACK */ +/* #define WITH_LAPACK */ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +/* # undef WORDS_BIGENDIAN */ +# endif +#endif +#endif diff --git a/media/sphinxbase/moz.build b/media/sphinxbase/moz.build new file mode 100644 index 000000000..d4aefad5c --- /dev/null +++ b/media/sphinxbase/moz.build @@ -0,0 +1,82 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS.sphinxbase += [ + 'sphinxbase/cmd_ln.h', + 'sphinxbase/fe.h', + 'sphinxbase/feat.h', + 'sphinxbase/logmath.h', +] + +SOURCES += [ + 'src/libsphinxbase/fe/fe_interface.c', + 'src/libsphinxbase/fe/fe_noise.c', + 'src/libsphinxbase/fe/fe_prespch_buf.c', + 'src/libsphinxbase/fe/fe_sigproc.c', + 'src/libsphinxbase/fe/fe_warp.c', + 'src/libsphinxbase/fe/fe_warp_affine.c', + 'src/libsphinxbase/fe/fe_warp_inverse_linear.c', + 'src/libsphinxbase/fe/fe_warp_piecewise_linear.c', + 'src/libsphinxbase/fe/fixlog.c', + 'src/libsphinxbase/fe/yin.c', + 'src/libsphinxbase/feat/agc.c', + 'src/libsphinxbase/feat/cmn.c', + 'src/libsphinxbase/feat/cmn_prior.c', + 'src/libsphinxbase/feat/feat.c', + 'src/libsphinxbase/feat/lda.c', + 'src/libsphinxbase/lm/fsg_model.c', + 'src/libsphinxbase/lm/jsgf.c', + 'src/libsphinxbase/lm/jsgf_parser.c', + 'src/libsphinxbase/lm/jsgf_scanner.c', + 'src/libsphinxbase/lm/lm3g_model.c', + 'src/libsphinxbase/lm/ngram_model.c', + 'src/libsphinxbase/lm/ngram_model_arpa.c', + 'src/libsphinxbase/lm/ngram_model_dmp.c', + 'src/libsphinxbase/lm/ngram_model_set.c', + 'src/libsphinxbase/util/bio.c', + 'src/libsphinxbase/util/bitvec.c', + 'src/libsphinxbase/util/blas_lite.c', + 'src/libsphinxbase/util/case.c', + 'src/libsphinxbase/util/ckd_alloc.c', + 'src/libsphinxbase/util/cmd_ln.c', + 'src/libsphinxbase/util/dtoa.c', + 'src/libsphinxbase/util/err.c', + 'src/libsphinxbase/util/errno.c', + 'src/libsphinxbase/util/f2c_lite.c', + 'src/libsphinxbase/util/filename.c', + 'src/libsphinxbase/util/genrand.c', + 'src/libsphinxbase/util/glist.c', + 'src/libsphinxbase/util/hash_table.c', + 'src/libsphinxbase/util/heap.c', + 'src/libsphinxbase/util/huff_code.c', + 'src/libsphinxbase/util/listelem_alloc.c', + 'src/libsphinxbase/util/logmath.c', + 'src/libsphinxbase/util/matrix.c', + 'src/libsphinxbase/util/mmio.c', + 'src/libsphinxbase/util/pio.c', + 'src/libsphinxbase/util/profile.c', + 'src/libsphinxbase/util/sbthread.c', + 'src/libsphinxbase/util/slamch.c', + 'src/libsphinxbase/util/slapack_lite.c', + 'src/libsphinxbase/util/strfuncs.c', + 'src/libsphinxbase/util/utf8.c', +] + +# Suppress warnings in third-party code. +if CONFIG['GNU_CC']: + CFLAGS += [ + '-Wno-parentheses', + '-Wno-sign-compare', + ] + +# Add define required of third party code. +if CONFIG['GNU_CC']: + DEFINES['HAVE_CONFIG_H'] = True + +# We allow warnings for third-party code that can be updated from upstream. +ALLOW_COMPILER_WARNINGS = True + +FINAL_LIBRARY = 'gkmedias' diff --git a/media/sphinxbase/pocketsphinx-COPYING.txt b/media/sphinxbase/pocketsphinx-COPYING.txt new file mode 100644 index 000000000..2d44d6bf3 --- /dev/null +++ b/media/sphinxbase/pocketsphinx-COPYING.txt @@ -0,0 +1,36 @@ +/* ==================================================================== + * Copyright (c) 1999-2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + diff --git a/media/sphinxbase/sbthread.patch b/media/sphinxbase/sbthread.patch new file mode 100644 index 000000000..b2ce9457b --- /dev/null +++ b/media/sphinxbase/sbthread.patch @@ -0,0 +1,14 @@ +diff --git b/src/libsphinxbase/util/sbthread.c a/src/libsphinxbase/util/sbthread.c +index 14207cb..2f82c76 100644 +--- b/src/libsphinxbase/util/sbthread.c ++++ a/src/libsphinxbase/util/sbthread.c +@@ -51,7 +51,9 @@ + * Platform-specific parts: threads, mutexes, and signals. + */ + #if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(__SYMBIAN32__) ++#ifndef _WIN32_WINNT + #define _WIN32_WINNT 0x0400 ++#endif /* not _WIN32_WINNT */ + #include + + struct sbthread_s { diff --git a/media/sphinxbase/sphinx_config.h b/media/sphinxbase/sphinx_config.h new file mode 100644 index 000000000..6e409bb37 --- /dev/null +++ b/media/sphinxbase/sphinx_config.h @@ -0,0 +1,42 @@ +#if ( defined(_WIN32) || defined(__CYGWIN__) ) +/* include/sphinx_config.h, defaults for Win32 */ +/* sphinx_config.h: Externally visible configuration parameters for + * SphinxBase. + */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use Q15 fixed-point computation */ +/* #undef FIXED16 */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 +#else +/* include/sphinx_config.h. Generated from sphinx_config.h.in by configure. */ +/* sphinx_config.h: Externally visible configuration parameters */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use Q15 fixed-point computation */ +/* #undef FIXED16 */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* Define to 1 if the system has the type `long long'. */ +/*#define HAVE_LONG_LONG 1*/ + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* Enable debugging output */ +/* #undef SPHINX_DEBUG */ +#endif diff --git a/media/sphinxbase/sphinxbase/ad.h b/media/sphinxbase/sphinxbase/ad.h new file mode 100644 index 000000000..5b9c25fe7 --- /dev/null +++ b/media/sphinxbase/sphinxbase/ad.h @@ -0,0 +1,126 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2014 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** \file ad.h + * \brief generic live audio interface for recording and playback + */ + +#ifndef _AD_H_ +#define _AD_H_ + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define DEFAULT_SAMPLES_PER_SEC 16000 + +/* Return codes */ +#define AD_OK 0 +#define AD_EOF -1 +#define AD_ERR_GEN -1 +#define AD_ERR_NOT_OPEN -2 +#define AD_ERR_WAVE -3 + +typedef struct ad_rec_s ad_rec_t; + +/** + * Open a specific audio device for recording. + * + * The device is opened in non-blocking mode and placed in idle state. + * + * @return pointer to read-only ad_rec_t structure if successful, NULL + * otherwise. The return value to be used as the first argument to + * other recording functions. + */ +SPHINXBASE_EXPORT +ad_rec_t *ad_open_dev ( + const char *dev, /**< Device name (platform-specific) */ + int32 samples_per_sec /**< Samples per second */ + ); + +/** + * Open the default audio device with a given sampling rate. + */ +SPHINXBASE_EXPORT +ad_rec_t *ad_open_sps ( + int32 samples_per_sec /**< Samples per second */ + ); + + +/** + * Open the default audio device. + */ +SPHINXBASE_EXPORT +ad_rec_t *ad_open ( void ); + + +/* Start audio recording. Return value: 0 if successful, <0 otherwise */ +SPHINXBASE_EXPORT +int32 ad_start_rec (ad_rec_t *); + + +/* Stop audio recording. Return value: 0 if successful, <0 otherwise */ +SPHINXBASE_EXPORT +int32 ad_stop_rec (ad_rec_t *); + + +/* Close the recording device. Return value: 0 if successful, <0 otherwise */ +SPHINXBASE_EXPORT +int32 ad_close (ad_rec_t *); + +/* + * Read next block of audio samples while recording; read upto max samples into buf. + * Return value: # samples actually read (could be 0 since non-blocking); -1 if not + * recording and no more samples remaining to be read from most recent recording. + */ +SPHINXBASE_EXPORT +int32 ad_read (ad_rec_t *, int16 *buf, int32 max); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/agc.h b/media/sphinxbase/sphinxbase/agc.h new file mode 100644 index 000000000..67d74f693 --- /dev/null +++ b/media/sphinxbase/sphinxbase/agc.h @@ -0,0 +1,202 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * agc.h -- Various forms of automatic gain control (AGC) + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.1 2006/04/05 20:27:30 dhdfu + * A Great Reorganzation of header files and executables + * + * Revision 1.8 2005/06/21 19:25:41 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.4 2005/06/13 04:02:56 archan + * Fixed most doxygen-style documentation under libs3decoder. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Copied from previous version. + */ + + +#ifndef _S3_AGC_H_ +#define _S3_AGC_H_ + +/* Win32/WinCE DLL gunk */ +#include + +#include +#include + +/** \file agc.h + * \brief routine that implements automatic gain control + * + * \warning This function may not be fully compatible with + * SphinxTrain's family of AGC. + * + * This implements AGC. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Types of acoustic gain control to apply to the features. + */ +typedef enum agc_type_e { + AGC_NONE = 0, + AGC_MAX, + AGC_EMAX, + AGC_NOISE +} agc_type_t; + +/** Convert string representation (from command-line) to agc_type_t */ +SPHINXBASE_EXPORT +agc_type_t agc_type_from_str(const char *str); + +/** String representations of agc_type_t values. */ +SPHINXBASE_EXPORT +extern const char *agc_type_str[]; + +/** + * Structure holding data for doing AGC. + **/ +typedef struct agc_s { + mfcc_t max; /**< Estimated max for current utterance (for AGC_EMAX) */ + mfcc_t obs_max; /**< Observed max in current utterance */ + int32 obs_frame; /**< Whether any data was observed after prev update */ + int32 obs_utt; /**< Whether any utterances have been observed */ + mfcc_t obs_max_sum; + mfcc_t noise_thresh; /**< Noise threshold (for AGC_NOISE only) */ +} agc_t; + +/** + * Initialize AGC structure with default values. + */ +SPHINXBASE_EXPORT +agc_t *agc_init(void); + +/** + * Free AGC structure. + */ +SPHINXBASE_EXPORT +void agc_free(agc_t *agc); + +/** + * Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given + * input such that the max C0 value is 0, by subtracting the input max C0 from all). + * This function operates on an entire utterance at a time. Hence, the entire utterance + * must be available beforehand (batchmode). + */ +SPHINXBASE_EXPORT +void agc_max(agc_t *agc, /**< In: AGC structure (not used) */ + mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */ + int32 n_frame /**< In: number of frames of cepstrum vectors supplied */ + ); + +/** + * Apply AGC to the given block of MFC vectors. + * Unlike agc_max() this does not require the entire utterance to be + * available. Call agc_emax_update() at the end of each utterance to + * update the AGC parameters. */ +SPHINXBASE_EXPORT +void agc_emax(agc_t *agc, /**< In: AGC structure */ + mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */ + int32 n_frame /**< In: number of frames of cepstrum vectors supplied */ + ); + +/** + * Update AGC parameters for next utterance. + **/ +SPHINXBASE_EXPORT +void agc_emax_update(agc_t *agc /**< In: AGC structure */ + ); + +/** + * Get the current AGC maximum estimate. + **/ +SPHINXBASE_EXPORT +float32 agc_emax_get(agc_t *agc); + +/** + * Set the current AGC maximum estimate. + **/ +SPHINXBASE_EXPORT +void agc_emax_set(agc_t *agc, float32 m); + +/** + * Apply AGC using noise threshold to the given block of MFC vectors. + **/ +SPHINXBASE_EXPORT +void agc_noise(agc_t *agc, /**< In: AGC structure */ + mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */ + int32 n_frame /**< In: number of frames of cepstrum vectors supplied */ + ); + +/** + * Get the current AGC noise threshold. + **/ +SPHINXBASE_EXPORT +float32 agc_get_threshold(agc_t *agc); + +/** + * Set the current AGC noise threshold. + **/ +SPHINXBASE_EXPORT +void agc_set_threshold(agc_t *agc, float32 threshold); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/bio.h b/media/sphinxbase/sphinxbase/bio.h new file mode 100644 index 000000000..0382df16a --- /dev/null +++ b/media/sphinxbase/sphinxbase/bio.h @@ -0,0 +1,304 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * bio.h -- Sphinx-3 binary file I/O functions. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: bio.h,v $ + * Revision 1.8 2005/06/21 20:40:46 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add the $ keyword. + * + * Revision 1.5 2005/06/13 04:02:57 archan + * Fixed most doxygen-style documentation under libs3decoder. + * + * Revision 1.4 2005/05/10 21:21:52 archan + * Three functionalities added but not tested. Code on 1) addition/deletion of LM in mode 4. 2) reading text-based LM 3) Converting txt-based LM to dmp-based LM. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +#ifndef _S3_BIO_H_ +#define _S3_BIO_H_ + +#include +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +/** \file bio.h + * \brief Cross platform binary IO to process files in sphinx3 format. + * + * + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define BYTE_ORDER_MAGIC (0x11223344) + +/** "reversed senses" SWAP, ARCHAN: This is still incorporated in + Sphinx 3 because lm3g2dmp used it. Don't think that I am very + happy with it. */ + +#if (__BIG_ENDIAN__) +#define REVERSE_SENSE_SWAP_INT16(x) x = ( (((x)<<8)&0x0000ff00) | (((x)>>8)&0x00ff) ) +#define REVERSE_SENSE_SWAP_INT32(x) x = ( (((x)<<24)&0xff000000) | (((x)<<8)&0x00ff0000) | \ + (((x)>>8)&0x0000ff00) | (((x)>>24)&0x000000ff) ) +#else +#define REVERSE_SENSE_SWAP_INT16(x) +#define REVERSE_SENSE_SWAP_INT32(x) + +#endif + + + +/** + * Read binary file format header: has the following format + *
+ *     s3
+ *      
+ *      
+ *     ...
+ *     endhdr
+ *     4-byte byte-order word used to find file byte ordering relative to host machine.
+ * 
+ * Lines beginning with # are ignored. + * Memory for name and val allocated by this function; use bio_hdrarg_free to free them. + * @return 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int32 bio_readhdr (FILE *fp, /**< In: File to read */ + char ***name, /**< Out: array of argument name strings read */ + char ***val, /**< Out: corresponding value strings read */ + int32 *swap /**< Out: file needs byteswapping iff (*swap) */ + ); +/** + * Write a simple binary file header, containing only the version string. Also write + * the byte order magic word. + * @return 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int32 bio_writehdr_version (FILE *fp, /**< Output: File to write */ + char *version /**< Input: A string of version */ + ); + + +/** + * Write a simple binary file header with only byte order magic word. + * @return 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int32 bio_writehdr(FILE *fp, ...); + +/** + * Free name and value strings previously allocated and returned by bio_readhdr. + */ +SPHINXBASE_EXPORT +void bio_hdrarg_free (char **name, /**< In: Array previously returned by bio_readhdr */ + char **val /**< In: Array previously returned by bio_readhdr */ + ); + +/** + * Like fread but perform byteswapping and accumulate checksum (the 2 extra arguments). + * + * @return unlike fread, returns -1 if required number of elements (n_el) not read; also, + * no byteswapping or checksum accumulation is performed in that case. + */ +SPHINXBASE_EXPORT +int32 bio_fread (void *buf, /**< In: buffer to write */ + int32 el_sz, /**< In: element size */ + int32 n_el, /**< In: number of elements */ + FILE *fp, /**< In: An input file pointer */ + int32 swap, /**< In: Byteswap iff (swap != 0) */ + uint32 *chksum /**< In/Out: Accumulated checksum */ + ); + +/** + * Like fwrite but perform byteswapping and accumulate checksum (the 2 extra arguments). + * + * @return the number of elemens written (like fwrite). + */ +SPHINXBASE_EXPORT +int32 bio_fwrite(const void *buf, /**< In: buffer to write */ + int32 el_sz, /**< In: element size */ + int32 n_el, /**< In: number of elements */ + FILE *fp, /**< In: An input file pointer */ + int32 swap, /**< In: Byteswap iff (swap != 0) */ + uint32 *chksum /**< In/Out: Accumulated checksum */ + ); + +/** + * Read a 1-d array (fashioned after fread): + * + * - 4-byte array size (returned in n_el) + * - memory allocated for the array and read (returned in buf) + * + * Byteswapping and checksum accumulation performed as necessary. + * Fails fatally if expected data not read. + * @return number of array elements allocated and read; -1 if error. + */ +SPHINXBASE_EXPORT +int32 bio_fread_1d (void **buf, /**< Out: contains array data; allocated by this + function; can be freed using ckd_free */ + size_t el_sz, /**< In: Array element size */ + uint32 *n_el, /**< Out: Number of array elements allocated/read */ + FILE *fp, /**< In: File to read */ + int32 sw, /**< In: Byteswap iff (swap != 0) */ + uint32 *ck /**< In/Out: Accumulated checksum */ + ); + +/** + * Read a 2-d matrix: + * + * - 4-byte # rows, # columns (returned in d1, d2, d3) + * - memory allocated for the array and read (returned in buf) + * + * Byteswapping and checksum accumulation performed as necessary. + * Fails fatally if expected data not read. + * @return number of array elements allocated and read; -1 if error. + */ +SPHINXBASE_EXPORT +int32 bio_fread_2d(void ***arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + FILE *fp, + uint32 swap, + uint32 *chksum); + +/** + * Read a 3-d array (set of matrices) + * + * - 4-byte # matrices, # rows, # columns (returned in d1, d2, d3) + * - memory allocated for the array and read (returned in buf) + * + * Byteswapping and checksum accumulation performed as necessary. + * Fails fatally if expected data not read. + * @return number of array elements allocated and read; -1 if error. + */ +SPHINXBASE_EXPORT +int32 bio_fread_3d(void ****arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + uint32 *d3, + FILE *fp, + uint32 swap, + uint32 *chksum); + +/** + * Read and verify checksum at the end of binary file. Fails fatally if there is + * a mismatch. + */ +SPHINXBASE_EXPORT +void bio_verify_chksum (FILE *fp, /**< In: File to read */ + int32 byteswap, /**< In: Byteswap iff (swap != 0) */ + uint32 chksum /**< In: Value to compare with checksum in file */ + ); + + + +/** + * Write a 1-d array. + * Checksum accumulation performed as necessary. + * + * @return number of array elements successfully written or -1 if error. + */ +SPHINXBASE_EXPORT +int bio_fwrite_1d(void *arr, /**< In: Data to write */ + size_t e_sz, /**< In: Size of the elements in bytes */ + uint32 d1, /**< In: First dimension */ + FILE *fp, /**< In: File to write to */ + uint32 *chksum /**< In/Out: Checksum accumulator */ + ); + +/** + * Write a 3-d array (set of matrices). + * Checksum accumulation performed as necessary. + * + * @return number of array elements successfully written or -1 if error. + */ +SPHINXBASE_EXPORT +int bio_fwrite_3d(void ***arr, /**< In: Data to write */ + size_t e_sz, /**< In: Size of the elements in bytes */ + uint32 d1, /**< In: First dimension */ + uint32 d2, /**< In: Second dimension */ + uint32 d3, /**< In: Third dimension */ + FILE *fp, /**< In: File to write to */ + uint32 *chksum /**< In/Out: Checksum accumulator */ + ); + +/** + * Read raw data from the wav file. + * + * @return pointer to the data. + */ +SPHINXBASE_EXPORT +int16* bio_read_wavfile(char const *directory, /**< In: the folder where the file is located */ + char const *filename, /**< In: the name of the file */ + char const *extension, /**< In: file extension */ + int32 header, /**< In: the size of the header to skip usually 44 bytes */ + int32 endian, /**< In: endian of the data */ + size_t *nsamps /**< Out: number of samples read */ + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/bitvec.h b/media/sphinxbase/sphinxbase/bitvec.h new file mode 100644 index 000000000..d5644df89 --- /dev/null +++ b/media/sphinxbase/sphinxbase/bitvec.h @@ -0,0 +1,155 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _LIBUTIL_BITVEC_H_ +#define _LIBUTIL_BITVEC_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include + +#include +#include + +/** + * @file bitvec.h + * @brief An implementation of bit vectors. + * + * Implementation of basic operations of bit vectors. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define BITVEC_BITS 32 +typedef uint32 bitvec_t; + +/** + * Number of bitvec_t in a bit vector + */ +#define bitvec_size(n) (((n)+BITVEC_BITS-1)/BITVEC_BITS) + +/** + * Allocate a bit vector, all bits are clear + */ +#define bitvec_alloc(n) ckd_calloc(bitvec_size(n), sizeof(bitvec_t)) + +/** + * Resize a bit vector, clear the remaining bits + */ +SPHINXBASE_EXPORT +bitvec_t *bitvec_realloc(bitvec_t *vec, /* In: Bit vector to search */ + size_t old_len, /* In: Old length */ + size_t new_len); /* In: New lenght of above bit vector */ +/** + * Free a bit vector. + */ +#define bitvec_free(v) ckd_free(v) + +/** + * Set the b-th bit of bit vector v + * @param v is a vector + * @param b is the bit which will be set + */ + +#define bitvec_set(v,b) (v[(b)/BITVEC_BITS] |= (1UL << ((b) & (BITVEC_BITS-1)))) + +/** + * Set all n bits in bit vector v + * @param v is a vector + * @param n is the number of bits + */ + +#define bitvec_set_all(v,n) memset(v, (bitvec_t)-1, \ + (((n)+BITVEC_BITS-1)/BITVEC_BITS) * \ + sizeof(bitvec_t)) +/** + * Clear the b-th bit of bit vector v + * @param v is a vector + * @param b is the bit which will be set + */ + +#define bitvec_clear(v,b) (v[(b)/BITVEC_BITS] &= ~(1UL << ((b) & (BITVEC_BITS-1)))) + +/** + * Clear all n bits in bit vector v + * @param v is a vector + * @param n is the number of bits + */ + +#define bitvec_clear_all(v,n) memset(v, 0, (((n)+BITVEC_BITS-1)/BITVEC_BITS) * \ + sizeof(bitvec_t)) + +/** + * Check whether the b-th bit is set in vector v + * @param v is a vector + * @param b is the bit which will be checked + */ + +#define bitvec_is_set(v,b) (v[(b)/BITVEC_BITS] & (1UL << ((b) & (BITVEC_BITS-1)))) + +/** + * Check whether the b-th bit is cleared in vector v + * @param v is a vector + * @param b is the bit which will be checked + */ + +#define bitvec_is_clear(v,b) (! (bitvec_is_set(v,b))) + + +/** + * Return the number of bits set in the given bitvector. + * + * @param vec is the bit vector + * @param len is the length of bit vector vec + * @return the number of bits being set in vector vec + */ +SPHINXBASE_EXPORT +size_t bitvec_count_set(bitvec_t *vec, /* In: Bit vector to search */ + size_t len); /* In: Lenght of above bit vector */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/byteorder.h b/media/sphinxbase/sphinxbase/byteorder.h new file mode 100644 index 000000000..692ce60a0 --- /dev/null +++ b/media/sphinxbase/sphinxbase/byteorder.h @@ -0,0 +1,98 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * byteorder.h -- Byte swapping ordering macros. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * $Log: byteorder.h,v $ + * Revision 1.8 2005/09/01 21:09:54 dhdfu + * Really, actually, truly consolidate byteswapping operations into + * byteorder.h. Where unconditional byteswapping is needed, SWAP_INT32() + * and SWAP_INT16() are to be used. The WORDS_BIGENDIAN macro from + * autoconf controls the functioning of the conditional swap macros + * (SWAP_?[LW]) whose names and semantics have been regularized. + * Private, adhoc macros have been removed. + * + */ + +#ifndef __S2_BYTEORDER_H__ +#define __S2_BYTEORDER_H__ 1 + +/* Macro to byteswap an int16 variable. x = ptr to variable */ +#define SWAP_INT16(x) *(x) = ((0x00ff & (*(x))>>8) | (0xff00 & (*(x))<<8)) + +/* Macro to byteswap an int32 variable. x = ptr to variable */ +#define SWAP_INT32(x) *(x) = ((0x000000ff & (*(x))>>24) | \ + (0x0000ff00 & (*(x))>>8) | \ + (0x00ff0000 & (*(x))<<8) | \ + (0xff000000 & (*(x))<<24)) + +/* Macro to byteswap a float32 variable. x = ptr to variable */ +#define SWAP_FLOAT32(x) SWAP_INT32((int32 *) x) + +/* Macro to byteswap a float64 variable. x = ptr to variable */ +#define SWAP_FLOAT64(x) { int *low = (int *) (x), *high = (int *) (x) + 1,\ + temp;\ + SWAP_INT32(low); SWAP_INT32(high);\ + temp = *low; *low = *high; *high = temp;} + +#ifdef WORDS_BIGENDIAN +#define SWAP_BE_64(x) +#define SWAP_BE_32(x) +#define SWAP_BE_16(x) +#define SWAP_LE_64(x) SWAP_FLOAT64(x) +#define SWAP_LE_32(x) SWAP_INT32(x) +#define SWAP_LE_16(x) SWAP_INT16(x) +#else +#define SWAP_LE_64(x) +#define SWAP_LE_32(x) +#define SWAP_LE_16(x) +#define SWAP_BE_64(x) SWAP_FLOAT64(x) +#define SWAP_BE_32(x) SWAP_INT32(x) +#define SWAP_BE_16(x) SWAP_INT16(x) +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/case.h b/media/sphinxbase/sphinxbase/case.h new file mode 100644 index 000000000..bd1f62e02 --- /dev/null +++ b/media/sphinxbase/sphinxbase/case.h @@ -0,0 +1,135 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * case.h -- Upper/lower case conversion routines + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: case.h,v $ + * Revision 1.7 2005/06/22 02:58:54 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added strcmp_nocase, UPPER_CASE and LOWER_CASE definitions. + * + * 16-Feb-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +/** + * @file case.h + * @brief Locale-independent implementation of case swapping operation. + * + * This function implements ASCII-only case switching and comparison + * related operations, which do not depend on the locale and are + * guaranteed to exist on all versions of Windows. + */ + +#ifndef _LIBUTIL_CASE_H_ +#define _LIBUTIL_CASE_H_ + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + + /** + * Return upper case form for c + */ +#define UPPER_CASE(c) ((((c) >= 'a') && ((c) <= 'z')) ? (c-32) : c) + + /** + * Return lower case form for c + */ +#define LOWER_CASE(c) ((((c) >= 'A') && ((c) <= 'Z')) ? (c+32) : c) + + + /** + * Convert str to all upper case. + * @param str is a string. + */ +SPHINXBASE_EXPORT +void ucase(char *str); + + /** + * Convert str to all lower case + * @param str is a string. + */ +SPHINXBASE_EXPORT +void lcase(char *str); + + /** + * (FIXME! The implementation is incorrect!) + * Case insensitive string compare. Return the usual -1, 0, +1, depending on + * str1 <, =, > str2 (case insensitive, of course). + * @param str1 is the first string. + * @param str2 is the second string. + */ +SPHINXBASE_EXPORT +int32 strcmp_nocase(const char *str1, const char *str2); + +/** + * Like strcmp_nocase() but with a maximum length. + */ +SPHINXBASE_EXPORT +int32 strncmp_nocase(const char *str1, const char *str2, size_t len); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/ckd_alloc.h b/media/sphinxbase/sphinxbase/ckd_alloc.h new file mode 100644 index 000000000..221ed6cae --- /dev/null +++ b/media/sphinxbase/sphinxbase/ckd_alloc.h @@ -0,0 +1,310 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * ckd_alloc.h -- Memory allocation package. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: ckd_alloc.h,v $ + * Revision 1.10 2005/06/22 02:59:25 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Removed file,line arguments from free functions. + * + * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +/********************************************************************* + * + * $Header: /cvsroot/cmusphinx/sphinx3/src/libutil/ckd_alloc.h,v 1.10 2005/06/22 02:59:25 arthchan2003 Exp $ + * + * Carnegie Mellon ARPA Speech Group + * + * Copyright (c) 1994 Carnegie Mellon University. + * All rights reserved. + * + ********************************************************************* + * + * file: ckd_alloc.h + * + * traceability: + * + * description: + * + * author: + * + *********************************************************************/ + + +#ifndef _LIBUTIL_CKD_ALLOC_H_ +#define _LIBUTIL_CKD_ALLOC_H_ + +#include +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** \file ckd_alloc.h + *\brief Sphinx's memory allocation/deallocation routines. + * + *Implementation of efficient memory allocation deallocation for + *multiple dimensional arrays. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Control behaviour of the program when allocation fails. + * + * Although your program is probably toast when memory allocation + * fails, it is also probably a good idea to be able to catch these + * errors and alert the user in some way. Either that, or you might + * want the program to call abort() so that you can debug the failed + * code. This function allows you to control that behaviour. + * + * @param env Pointer to a jmp_buf initialized with + * setjmp(), or NULL to remove a previously set jump target. + * @param abort If non-zero, the program will call abort() when + * allocation fails rather than exiting or calling longjmp(). + * @return Pointer to a previously set jmp_buf, if any. + */ +jmp_buf *ckd_set_jump(jmp_buf *env, int abort); + +/** + * Fail (with a message) according to behaviour specified by ckd_set_jump(). + */ +void ckd_fail(char *format, ...); + +/* + * The following functions are similar to the malloc family, except + * that they have two additional parameters, caller_file and + * caller_line, for error reporting. All functions print a diagnostic + * message if any error occurs, with any other behaviour determined by + * ckd_set_jump(), above. + */ + +SPHINXBASE_EXPORT +void *__ckd_calloc__(size_t n_elem, size_t elem_size, + const char *caller_file, int caller_line); + +SPHINXBASE_EXPORT +void *__ckd_malloc__(size_t size, + const char *caller_file, int caller_line); + +SPHINXBASE_EXPORT +void *__ckd_realloc__(void *ptr, size_t new_size, + const char *caller_file, int caller_line); + +/** + * Like strdup, except that if an error occurs it prints a diagnostic message and + * exits. If origin in NULL the function also returns NULL. + */ +SPHINXBASE_EXPORT +char *__ckd_salloc__(const char *origstr, + const char *caller_file, int caller_line); + +/** + * Allocate a 2-D array and return ptr to it (ie, ptr to vector of ptrs). + * The data area is allocated in one block so it can also be treated as a 1-D array. + */ +SPHINXBASE_EXPORT +void *__ckd_calloc_2d__(size_t d1, size_t d2, /* In: #elements in the 2 dimensions */ + size_t elemsize, /* In: Size (#bytes) of each element */ + const char *caller_file, int caller_line); /* In */ + +/** + * Allocate a 3-D array and return ptr to it. + * The data area is allocated in one block so it can also be treated as a 1-D array. + */ +SPHINXBASE_EXPORT +void *__ckd_calloc_3d__(size_t d1, size_t d2, size_t d3, /* In: #elems in the dims */ + size_t elemsize, /* In: Size (#bytes) per element */ + const char *caller_file, int caller_line); /* In */ + +/** + * Allocate a 34D array and return ptr to it. + * The data area is allocated in one block so it can also be treated as a 1-D array. + */ +SPHINXBASE_EXPORT +void ****__ckd_calloc_4d__(size_t d1, + size_t d2, + size_t d3, + size_t d4, + size_t elem_size, + char *caller_file, + int caller_line); + +/** + * Overlay a 3-D array over a previously allocated storage area. + **/ +SPHINXBASE_EXPORT +void * __ckd_alloc_3d_ptr(size_t d1, + size_t d2, + size_t d3, + void *store, + size_t elem_size, + char *caller_file, + int caller_line); + +/** + * Overlay a s-D array over a previously allocated storage area. + **/ +SPHINXBASE_EXPORT +void *__ckd_alloc_2d_ptr(size_t d1, + size_t d2, + void *store, + size_t elem_size, + char *caller_file, + int caller_line); + +/** + * Test and free a 1-D array + */ +SPHINXBASE_EXPORT +void ckd_free(void *ptr); + +/** + * Free a 2-D array (ptr) previously allocated by ckd_calloc_2d + */ +SPHINXBASE_EXPORT +void ckd_free_2d(void *ptr); + +/** + * Free a 3-D array (ptr) previously allocated by ckd_calloc_3d + */ +SPHINXBASE_EXPORT +void ckd_free_3d(void *ptr); + +/** + * Free a 4-D array (ptr) previously allocated by ckd_calloc_4d + */ +SPHINXBASE_EXPORT +void ckd_free_4d(void *ptr); + +/** + * Macros to simplify the use of above functions. + * One should use these, rather than target functions directly. + */ + +/** + * Macro for __ckd_calloc__ + */ +#define ckd_calloc(n,sz) __ckd_calloc__((n),(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_malloc__ + */ +#define ckd_malloc(sz) __ckd_malloc__((sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_realloc__ + */ +#define ckd_realloc(ptr,sz) __ckd_realloc__(ptr,(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_salloc__ + */ + +#define ckd_salloc(ptr) __ckd_salloc__(ptr,__FILE__,__LINE__) + +/** + * Macro for __ckd_calloc_2d__ + */ + +#define ckd_calloc_2d(d1,d2,sz) __ckd_calloc_2d__((d1),(d2),(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_calloc_3d__ + */ + +#define ckd_calloc_3d(d1,d2,d3,sz) __ckd_calloc_3d__((d1),(d2),(d3),(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_calloc_4d__ + */ +#define ckd_calloc_4d(d1, d2, d3, d4, s) __ckd_calloc_4d__((d1), (d2), (d3), (d4), (s), __FILE__, __LINE__) + +/** + * Macro for __ckd_alloc_2d_ptr__ + */ + +#define ckd_alloc_2d_ptr(d1, d2, bf, sz) __ckd_alloc_2d_ptr((d1), (d2), (bf), (sz), __FILE__, __LINE__) + +/** + * Free only the pointer arrays allocated with ckd_alloc_2d_ptr(). + */ +#define ckd_free_2d_ptr(bf) ckd_free(bf) + +/** + * Macro for __ckd_alloc_3d_ptr__ + */ + +#define ckd_alloc_3d_ptr(d1, d2, d3, bf, sz) __ckd_alloc_3d_ptr((d1), (d2), (d3), (bf), (sz), __FILE__, __LINE__) + +/** + * Free only the pointer arrays allocated with ckd_alloc_3d_ptr(). + */ +#define ckd_free_3d_ptr(bf) ckd_free_2d(bf) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/clapack_lite.h b/media/sphinxbase/sphinxbase/clapack_lite.h new file mode 100644 index 000000000..0f5a1f4bd --- /dev/null +++ b/media/sphinxbase/sphinxbase/clapack_lite.h @@ -0,0 +1,36 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#ifndef __CLAPACK_LITE_H +#define __CLAPACK_LITE_H + +#include "f2c.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc); +/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); +/* Subroutine */ int ssymm_(char *side, char *uplo, integer *m, integer *n, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc); + +/* Subroutine */ int sposv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info); +/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +#ifdef __cplusplus +} +#endif + + +#endif /* __CLAPACK_LITE_H */ diff --git a/media/sphinxbase/sphinxbase/cmd_ln.h b/media/sphinxbase/sphinxbase/cmd_ln.h new file mode 100644 index 000000000..364677e1b --- /dev/null +++ b/media/sphinxbase/sphinxbase/cmd_ln.h @@ -0,0 +1,587 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmd_ln.h -- Command line argument parsing. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 15-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added required arguments types. + * + * 07-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created, based on Eric's implementation. Basically, combined several + * functions into one, eliminated validation, and simplified the interface. + */ + + +#ifndef _LIBUTIL_CMD_LN_H_ +#define _LIBUTIL_CMD_LN_H_ + +#include +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** + * @file cmd_ln.h + * @brief Command-line and other configurationparsing and handling. + * + * Configuration parameters, optionally parsed from the command line. + */ + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * @struct arg_t + * Argument definition structure. + */ +typedef struct arg_s { + char const *name; /**< Name of the command line switch */ + int type; /**< Type of the argument in question */ + char const *deflt; /**< Default value (as a character string), or NULL if none */ + char const *doc; /**< Documentation/description string */ +} arg_t; + +/** + * @name Values for arg_t::type + */ +/* @{ */ +/** + * Bit indicating a required argument. + */ +#define ARG_REQUIRED (1<<0) +/** + * Integer argument (optional). + */ +#define ARG_INTEGER (1<<1) +/** + * Floating point argument (optional). + */ +#define ARG_FLOATING (1<<2) +/** + * String argument (optional). + */ +#define ARG_STRING (1<<3) +/** + * Boolean (true/false) argument (optional). + */ +#define ARG_BOOLEAN (1<<4) +/** + * Boolean (true/false) argument (optional). + */ +#define ARG_STRING_LIST (1<<5) + +/** + * Required integer argument. + */ +#define REQARG_INTEGER (ARG_INTEGER | ARG_REQUIRED) +/** + * Required floating point argument. + */ +#define REQARG_FLOATING (ARG_FLOATING | ARG_REQUIRED) +/** + * Required string argument. + */ +#define REQARG_STRING (ARG_STRING | ARG_REQUIRED) +/** + * Required boolean argument. + */ +#define REQARG_BOOLEAN (ARG_BOOLEAN | ARG_REQUIRED) + +/** + * @deprecated Use ARG_INTEGER instead. + */ +#define ARG_INT32 ARG_INTEGER +/** + * @deprecated Use ARG_FLOATING instead. + */ +#define ARG_FLOAT32 ARG_FLOATING +/** + * @deprecated Use ARG_FLOATING instead. + */ +#define ARG_FLOAT64 ARG_FLOATING +/** + * @deprecated Use REQARG_INTEGER instead. + */ +#define REQARG_INT32 (ARG_INT32 | ARG_REQUIRED) +/** + * @deprecated Use REQARG_FLOATING instead. + */ +#define REQARG_FLOAT32 (ARG_FLOAT32 | ARG_REQUIRED) +/** + * @deprecated Use REQARG_FLOATING instead. + */ +#define REQARG_FLOAT64 (ARG_FLOAT64 | ARG_REQUIRED) +/* @} */ + + +/** + * Helper macro to stringify enums and other non-string values for + * default arguments. + **/ +#define ARG_STRINGIFY(s) ARG_STRINGIFY1(s) +#define ARG_STRINGIFY1(s) #s + +/** + * @struct cmd_ln_t + * Opaque structure used to hold the results of command-line parsing. + */ +typedef struct cmd_ln_s cmd_ln_t; + +/** + * Create a cmd_ln_t from NULL-terminated list of arguments. + * + * This function creates a cmd_ln_t from a NULL-terminated list of + * argument strings. For example, to create the equivalent of passing + * "-hmm foodir -dsratio 2 -lm bar.lm" on the command-line: + * + * config = cmd_ln_init(NULL, defs, TRUE, "-hmm", "foodir", "-dsratio", "2", + * "-lm", "bar.lm", NULL); + * + * Note that for simplicity, all arguments are passed + * as strings, regardless of the actual underlying type. + * + * @param inout_cmdln Previous command-line to update, or NULL to create a new one. + * @param defn Array of argument name definitions, or NULL to allow any arguments. + * @param strict Whether to fail on duplicate or unknown arguments. + * @return A cmd_ln_t* containing the results of command line parsing, or NULL on failure. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_init(cmd_ln_t *inout_cmdln, arg_t const *defn, int32 strict, ...); + +/** + * Retain ownership of a command-line argument set. + * + * @return pointer to retained command-line argument set. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_retain(cmd_ln_t *cmdln); + +/** + * Release a command-line argument set and all associated strings. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int cmd_ln_free_r(cmd_ln_t *cmdln); + +/** + * Parse a list of strings into argumetns. + * + * Parse the given list of arguments (name-value pairs) according to + * the given definitions. Argument values can be retrieved in future + * using cmd_ln_access(). argv[0] is assumed to be the program name + * and skipped. Any unknown argument name causes a fatal error. The + * routine also prints the prevailing argument values (to stderr) + * after parsing. + * + * @note It is currently assumed that the strings in argv are + * allocated statically, or at least that they will be valid as + * long as the cmd_ln_t returned from this function. + * Unpredictable behaviour will result if they are freed or + * otherwise become invalidated. + * + * @return A cmd_ln_t containing the results of command line parsing, + * or NULL on failure. + **/ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_parse_r(cmd_ln_t *inout_cmdln, /**< In/Out: Previous command-line to update, + or NULL to create a new one. */ + arg_t const *defn, /**< In: Array of argument name definitions */ + int32 argc, /**< In: Number of actual arguments */ + char *argv[], /**< In: Actual arguments */ + int32 strict /**< In: Fail on duplicate or unknown + arguments, or no arguments? */ + ); + +/** + * Parse an arguments file by deliminating on " \r\t\n" and putting each tokens + * into an argv[] for cmd_ln_parse(). + * + * @return A cmd_ln_t containing the results of command line parsing, or NULL on failure. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, /**< In/Out: Previous command-line to update, + or NULL to create a new one. */ + arg_t const *defn, /**< In: Array of argument name definitions*/ + char const *filename,/**< In: A file that contains all + the arguments */ + int32 strict /**< In: Fail on duplicate or unknown + arguments, or no arguments? */ + ); + +/** + * Access the generic type union for a command line argument. + */ +SPHINXBASE_EXPORT +anytype_t *cmd_ln_access_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve a string from a command-line object. + * + * The command-line object retains ownership of this string, so you + * should not attempt to free it manually. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the string value associated with name, or NULL if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately NULL and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +char const *cmd_ln_str_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve an array of strings from a command-line object. + * + * The command-line object retains ownership of this array, so you + * should not attempt to free it manually. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the array of strings associated with name, or NULL if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately NULL and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +char const **cmd_ln_str_list_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve an integer from a command-line object. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the integer value associated with name, or 0 if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately zero and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +long cmd_ln_int_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve a floating-point number from a command-line object. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the float value associated with name, or 0.0 if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately zero and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +double cmd_ln_float_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve a boolean value from a command-line object. + */ +#define cmd_ln_boolean_r(c,n) (cmd_ln_int_r(c,n) != 0) + +/** + * Set a string in a command-line object. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param str String value to set. The command-line object does not + * retain ownership of this pointer. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_str_r(cmd_ln_t *cmdln, char const *name, char const *str); + +/** + * Set an integer in a command-line object. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param iv Integer value to set. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_int_r(cmd_ln_t *cmdln, char const *name, long iv); + +/** + * Set a floating-point number in a command-line object. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param fv Integer value to set. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_float_r(cmd_ln_t *cmdln, char const *name, double fv); + +/** + * Set a boolean value in a command-line object. + */ +#define cmd_ln_set_boolean_r(c,n,b) (cmd_ln_set_int_r(c,n,(b)!=0)) + +/* + * Compatibility macros + */ +#define cmd_ln_int32_r(c,n) (int32)cmd_ln_int_r(c,n) +#define cmd_ln_float32_r(c,n) (float32)cmd_ln_float_r(c,n) +#define cmd_ln_float64_r(c,n) (float64)cmd_ln_float_r(c,n) +#define cmd_ln_set_int32_r(c,n,i) cmd_ln_set_int_r(c,n,i) +#define cmd_ln_set_float32_r(c,n,f) cmd_ln_set_float_r(c,n,(double)f) +#define cmd_ln_set_float64_r(c,n,f) cmd_ln_set_float_r(c,n,(double)f) + +/** + * Re-entrant version of cmd_ln_exists(). + * + * @return True if the command line argument exists (i.e. it + * was one of the arguments defined in the call to cmd_ln_parse_r(). + */ +SPHINXBASE_EXPORT +int cmd_ln_exists_r(cmd_ln_t *cmdln, char const *name); + +/** + * Print a help message listing the valid argument names, and the associated + * attributes as given in defn. + * + * @param fp output stream + * @param defn Array of argument name definitions. + */ +SPHINXBASE_EXPORT +void cmd_ln_print_help_r (cmd_ln_t *cmdln, FILE *fp, const arg_t *defn); + +/** + * Non-reentrant version of cmd_ln_parse(). + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_parse_r(). + * @return 0 if successful, <0 if error. + */ +SPHINXBASE_EXPORT +int32 cmd_ln_parse(const arg_t *defn, /**< In: Array of argument name definitions */ + int32 argc, /**< In: Number of actual arguments */ + char *argv[], /**< In: Actual arguments */ + int32 strict /**< In: Fail on duplicate or unknown + arguments, or no arguments? */ + ); + +/** + * Parse an arguments file by deliminating on " \r\t\n" and putting each tokens + * into an argv[] for cmd_ln_parse(). + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_parse_file_r(). + * + * @return 0 if successful, <0 on error. + */ +SPHINXBASE_EXPORT +int32 cmd_ln_parse_file(const arg_t *defn, /**< In: Array of argument name definitions*/ + char const *filename,/**< In: A file that contains all the arguments */ + int32 strict /**< In: Fail on duplicate or unknown + arguments, or no arguments? */ + ); + +/** + * Old application initialization routine for Sphinx3 code. + * + * @deprecated This is deprecated in favor of the re-entrant API. + */ +SPHINXBASE_EXPORT +void cmd_ln_appl_enter(int argc, /**< In: Number of actual arguments */ + char *argv[], /**< In: Number of actual arguments */ + char const* default_argfn, /**< In: default argument file name*/ + const arg_t *defn /**< Command-line argument definition */ + ); + + +/** + * Finalization routine corresponding to cmd_ln_appl_enter(). + * + * @deprecated This is deprecated in favor of the re-entrant API. + */ + +SPHINXBASE_EXPORT +void cmd_ln_appl_exit(void); + +/** + * Retrieve the global cmd_ln_t object used by non-re-entrant functions. + * + * @deprecated This is deprecated in favor of the re-entrant API. + * @return global cmd_ln_t object. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_get(void); + +/** + * Test the existence of a command-line argument in the global set of + * definitions. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_exists_r(). + * + * @return True if the command line argument exists (i.e. it + * was one of the arguments defined in the call to cmd_ln_parse(). + */ +#define cmd_ln_exists(name) cmd_ln_exists_r(cmd_ln_get(), name) + +/** + * Return a pointer to the previously parsed value for the given argument name. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_access_r(). + */ +#define cmd_ln_access(name) cmd_ln_access_r(cmd_ln_get(), name) + +/** + * Retrieve a string from the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_str_r(). + */ +#define cmd_ln_str(name) cmd_ln_str_r(cmd_ln_get(), name) + +/** + * Retrieve an array of strings in the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_str_list_r(). + */ +#define cmd_ln_str_list(name) cmd_ln_str_list_r(cmd_ln_get(), name) + +/** + * Retrieve a 32-bit integer from the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_int_r(). + */ +#define cmd_ln_int32(name) (int32)cmd_ln_int_r(cmd_ln_get(), name) +/** + * Retrieve a 32-bit float from the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_float_r(). + */ +#define cmd_ln_float32(name) (float32)cmd_ln_float_r(cmd_ln_get(), name) +/** + * Retrieve a 64-bit float from the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_float_r(). + */ +#define cmd_ln_float64(name) (float64)cmd_ln_float_r(cmd_ln_get(), name) +/** + * Retrieve a boolean from the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_boolean_r(). + */ +#define cmd_ln_boolean(name) cmd_ln_boolean_r(cmd_ln_get(), name) + +/** + * Set a string in the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_set_str_r(). + */ +#define cmd_ln_set_str(n,s) cmd_ln_set_str_r(cmd_ln_get(),n,s) +/** + * Set a 32-bit integer value in the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_set_int_r(). + */ +#define cmd_ln_set_int32(n,i) cmd_ln_set_int_r(cmd_ln_get(),n,i) +/** + * Set a 32-bit float in the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_set_float_r(). + */ +#define cmd_ln_set_float32(n,f) cmd_ln_set_float_r(cmd_ln_get(),n,f) +/** + * Set a 64-bit float in the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_set_float_r(). + */ +#define cmd_ln_set_float64(n,f) cmd_ln_set_float_r(cmd_ln_get(),n,f) +/** + * Set a boolean value in the global command line. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_set_boolean_r(). + */ +#define cmd_ln_set_boolean(n,b) cmd_ln_set_boolean_r(cmd_ln_get(),n,b) + +/** + * Print a help message listing the valid argument names, and the associated + * attributes as given in defn. + * + * @deprecated This is deprecated in favor of the re-entrant API + * function cmd_ln_print_help_r(). + */ +#define cmd_ln_print_help(f,d) cmd_ln_print_help_r(cmd_ln_get(),f,d) + +/** + * Free the global command line, if any exists. + * @deprecated Use the re-entrant API instead. + */ +SPHINXBASE_EXPORT +void cmd_ln_free (void); + + +#ifdef __cplusplus +} +#endif + +#endif + + diff --git a/media/sphinxbase/sphinxbase/cmn.h b/media/sphinxbase/sphinxbase/cmn.h new file mode 100644 index 000000000..d91555e58 --- /dev/null +++ b/media/sphinxbase/sphinxbase/cmn.h @@ -0,0 +1,191 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmn.h -- Various forms of cepstral mean normalization + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.1 2006/04/05 20:27:30 dhdfu + * A Great Reorganzation of header files and executables + * + * Revision 1.13 2006/02/23 03:48:27 arthchan2003 + * Resolved conflict in cmn.h + * + * + * Revision 1.12 2006/02/22 23:43:55 arthchan2003 + * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH: Put data structure into the cmn_t structure. + * + * Revision 1.11.4.2 2005/10/17 04:45:57 arthchan2003 + * Free stuffs in cmn and feat corectly. + * + * Revision 1.11.4.1 2005/07/05 06:25:08 arthchan2003 + * Fixed dox-doc. + * + * Revision 1.11 2005/06/21 19:28:00 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.4 2005/06/13 04:02:56 archan + * Fixed most doxygen-style documentation under libs3decoder. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) + * Added cmn_free() and moved *mean and *var out global space and named them cmn_mean and cmn_var + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Copied from previous version. + */ + + +#ifndef _S3_CMN_H_ +#define _S3_CMN_H_ + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** \file cmn.h + * \brief Apply Cepstral Mean Normalization (CMN) to the set of input mfc frames. + * + * By subtractingthe mean of the input from each frame. C0 is also included in this process. + * This function operates on an entire utterance at a time. Hence, the entire utterance + * must be available beforehand (batchmode). + */ + +/** + * Types of cepstral mean normalization to apply to the features. + */ +typedef enum cmn_type_e { + CMN_NONE = 0, + CMN_CURRENT, + CMN_PRIOR +} cmn_type_t; + +/** String representations of cmn_type_t values. */ +SPHINXBASE_EXPORT +extern const char *cmn_type_str[]; + +/** Convert string representation (from command-line) to cmn_type_t */ +SPHINXBASE_EXPORT +cmn_type_t cmn_type_from_str(const char *str); + +/** \struct cmn_t + * \brief wrapper of operation of the cepstral mean normalization. + */ + +typedef struct { + mfcc_t *cmn_mean; /**< Temporary variable: current means */ + mfcc_t *cmn_var; /**< Temporary variables: stored the cmn variance */ + mfcc_t *sum; /**< The sum of the cmn frames */ + int32 nframe; /**< Number of frames */ + int32 veclen; /**< Length of cepstral vector */ +} cmn_t; + +SPHINXBASE_EXPORT +cmn_t* cmn_init(int32 veclen); + +/** + * CMN for the whole sentence +*/ +SPHINXBASE_EXPORT +void cmn (cmn_t *cmn, /**< In/Out: cmn normalization, which contains the cmn_mean and cmn_var) */ + mfcc_t **mfc, /**< In/Out: mfc[f] = mfc vector in frame f */ + int32 varnorm,/**< In: if not FALSE, variance normalize the input vectors + to have unit variance (along each dimension independently); + Irrelevant if no cmn is performed */ + int32 n_frame /**< In: Number of frames of mfc vectors */ + ); + +#define CMN_WIN_HWM 800 /* #frames after which window shifted */ +#define CMN_WIN 500 + +/** + * CMN for one block of data, using prior mean + */ +SPHINXBASE_EXPORT +void cmn_prior(cmn_t *cmn, /**< In/Out: cmn normalization, which contains + the cmn_mean and cmn_var) */ + mfcc_t **incep, /**< In/Out: mfc[f] = mfc vector in frame f*/ + int32 varnorm, /**< This flag should always be 0 for live */ + int32 nfr /**< Number of incoming frames */ + ); + +/** + * Update prior mean based on observed data + */ +SPHINXBASE_EXPORT +void cmn_prior_update(cmn_t *cmn); + +/** + * Set the prior mean. + */ +SPHINXBASE_EXPORT +void cmn_prior_set(cmn_t *cmn, mfcc_t const *vec); + +/** + * Get the prior mean. + */ +SPHINXBASE_EXPORT +void cmn_prior_get(cmn_t *cmn, mfcc_t *vec); + +/* RAH, free previously allocated memory */ +SPHINXBASE_EXPORT +void cmn_free (cmn_t *cmn); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/err.h b/media/sphinxbase/sphinxbase/err.h new file mode 100644 index 000000000..3715e5361 --- /dev/null +++ b/media/sphinxbase/sphinxbase/err.h @@ -0,0 +1,229 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _LIBUTIL_ERR_H_ +#define _LIBUTIL_ERR_H_ + +#include +#include +#include +#include + +/* Win32/WinCE DLL gunk */ +#include + +/** + * @file err.h + * @brief Implementation of logging routines. + * + * Logging, warning, debug and error message output funtionality is provided in this file. + * Sphinxbase defines several level of logging messages - INFO, WARNING, ERROR, FATAL. By + * default output goes to standard error output. + * + * Logging is implemented through macros. They take same arguments as printf: format string and + * values. By default source file name and source line are prepended to the message. Log output + * could be redirected to any file using err_set_logfp() and err_set_logfile() functions. To disable + * logging in your application, call err_set_logfp(NULL). + * + * It's possible to log multiline info messages, to do that you need to start message with + * E_INFO and output other lines with E_INFOCONT. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define E_SYSCALL(stmt, ...) if (stmt) E_FATAL_SYSTEM(__VA_ARGS__); + +#define FILELINE __FILE__ , __LINE__ + +/** + * Exit with non-zero status after error message + */ +#define E_FATAL(...) \ + do { \ + err_msg(ERR_FATAL, FILELINE, __VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +/** + * Print error text; Call perror(""); exit(errno); + */ +#define E_FATAL_SYSTEM(...) \ + do { \ + err_msg_system(ERR_FATAL, FILELINE, __VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +/** + * Print error text; Call perror(""); + */ +#define E_ERROR_SYSTEM(...) err_msg_system(ERR_ERROR, FILELINE, __VA_ARGS__) + +/** + * Print error message to error log + */ +#define E_ERROR(...) err_msg(ERR_ERROR, FILELINE, __VA_ARGS__) + +/** + * Print warning message to error log + */ +#define E_WARN(...) err_msg(ERR_WARN, FILELINE, __VA_ARGS__) + +/** + * Print logging information to standard error stream + */ +#define E_INFO(...) err_msg(ERR_INFO, FILELINE, __VA_ARGS__) + +/** + * Continue printing the information to standard error stream + */ +#define E_INFOCONT(...) err_msg(ERR_INFOCONT, NULL, 0, __VA_ARGS__) + +/** + * Print logging information without filename. + */ +#define E_INFO_NOFN(...) err_msg(ERR_INFO, NULL, 0, __VA_ARGS__) + +/** + * Print debugging information to standard error stream. + * + * This will only print a message if: + * 1. Debugging is enabled at compile time + * 2. The debug level is greater than or equal to \a level + * + * Note that for portability reasons the format and arguments must be + * enclosed in an extra set of parentheses. + */ +#ifdef SPHINX_DEBUG +#define E_DEBUG(level, ...) \ + if (err_get_debug_level() >= level) \ + err_msg(ERR_DEBUG, FILELINE, __VA_ARGS__) +#define E_DEBUGCONT(level, ...) \ + if (err_get_debug_level() >= level) \ + err_msg(ERR_DEBUG, NULL, 0, __VA_ARGS__) +#else +#define E_DEBUG(level,x) +#define E_DEBUGCONT(level,x) +#endif + +typedef enum err_e { + ERR_DEBUG, + ERR_INFO, + ERR_INFOCONT, + ERR_WARN, + ERR_ERROR, + ERR_FATAL, + ERR_MAX +} err_lvl_t; + +SPHINXBASE_EXPORT void +err_msg(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...); + +SPHINXBASE_EXPORT void +err_msg_system(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...); + +SPHINXBASE_EXPORT void +err_logfp_cb(void * user_data, err_lvl_t level, const char *fmt, ...); + +typedef void (*err_cb_f)(void* user_data, err_lvl_t, const char *, ...); + +/** + * Sets function to output error messages. Use it to redirect the logging + * to your application. By default the handler which dumps messages to + * stderr is set. + * + * @param - callback to pass messages too. + */ +SPHINXBASE_EXPORT void +err_set_callback(err_cb_f callback, void *user_data); + +/** + * Direct all logging to a given filehandle if default logfp callback is set. + * + * @param logfp Filehandle to send log messages to, or NULL to disable logging. + */ +SPHINXBASE_EXPORT void +err_set_logfp(FILE *stream); + +/** + * Get the current logging filehandle. + * + * @return Current logging filehandle, NULL if logging is disabled. Initially + * it returns stderr + */ +SPHINXBASE_EXPORT FILE * +err_get_logfp(void); + +/** + * Append all log messages to a given file. + * + * Previous logging filehandle is closed (unless it was stdout or stderr). + * + * @param file File path to send log messages to + * @return 0 for success, <0 for failure (e.g. if file does not exist) + */ +SPHINXBASE_EXPORT int +err_set_logfile(const char *path); + +/** + * Set debugging verbosity level. + * + * Note that debugging messages are only enabled when compiled with -DDEBUG. + * + * @param level Verbosity level to set, or 0 to disable debug messages. + */ +SPHINXBASE_EXPORT +int err_set_debug_level(int level); + +/** + * Get debugging verbosity level. + * + * Note that debugging messages are only enabled when compiled with -DDEBUG. + */ +SPHINXBASE_EXPORT +int err_get_debug_level(void); + +#ifdef __cplusplus +} +#endif + +#endif /* !_ERR_H */ diff --git a/media/sphinxbase/sphinxbase/f2c.h b/media/sphinxbase/sphinxbase/f2c.h new file mode 100644 index 000000000..a50d1c730 --- /dev/null +++ b/media/sphinxbase/sphinxbase/f2c.h @@ -0,0 +1,218 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef int integer; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef int flag; +typedef int ftnlen; +typedef int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + shortint h; + integer i; + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +typedef long Long; /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#ifndef abs +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#endif +#define dabs(x) (doublereal)abs(x) +#ifndef min +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#endif +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(void); +typedef shortint (*J_fp)(void); +typedef integer (*I_fp)(void); +typedef real (*R_fp)(void); +typedef doublereal (*D_fp)(void), (*E_fp)(void); +typedef /* Complex */ VOID (*C_fp)(void); +typedef /* Double Complex */ VOID (*Z_fp)(void); +typedef logical (*L_fp)(void); +typedef shortlogical (*K_fp)(void); +typedef /* Character */ VOID (*H_fp)(void); +typedef /* Subroutine */ int (*S_fp)(void); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/media/sphinxbase/sphinxbase/fe.h b/media/sphinxbase/sphinxbase/fe.h new file mode 100644 index 000000000..5f957a0d0 --- /dev/null +++ b/media/sphinxbase/sphinxbase/fe.h @@ -0,0 +1,617 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * fe.h + * + * $Log: fe.h,v $ + * Revision 1.11 2005/02/05 02:15:02 egouvea + * Removed fe_process(), never used + * + * Revision 1.10 2004/12/10 16:48:55 rkm + * Added continuous density acoustic model handling + * + * + */ + +#if defined(_WIN32) && !defined(GNUWINCE) +#define srand48(x) srand(x) +#define lrand48() rand() +#endif + +#ifndef _NEW_FE_H_ +#define _NEW_FE_H_ + +/* Win32/WinCE DLL gunk */ +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#ifdef WORDS_BIGENDIAN +#define NATIVE_ENDIAN "big" +#else +#define NATIVE_ENDIAN "little" +#endif + +/** Default number of samples per second. */ +#define DEFAULT_SAMPLING_RATE 16000 +/** Default number of frames per second. */ +#define DEFAULT_FRAME_RATE 100 +/** Default spacing between frame starts (equal to + * DEFAULT_SAMPLING_RATE/DEFAULT_FRAME_RATE) */ +#define DEFAULT_FRAME_SHIFT 160 +/** Default size of each frame (410 samples @ 16000Hz). */ +#define DEFAULT_WINDOW_LENGTH 0.025625 +/** Default number of FFT points. */ +#define DEFAULT_FFT_SIZE 512 +/** Default number of MFCC coefficients in output. */ +#define DEFAULT_NUM_CEPSTRA 13 +/** Default number of filter bands used to generate MFCCs. */ +#define DEFAULT_NUM_FILTERS 40 +/** Default prespeech state length */ +#define DEFAULT_PRESPCH_STATE_LEN 10 +/** Default postspeech state length */ +#define DEFAULT_POSTSPCH_STATE_LEN 50 +/** Default lower edge of mel filter bank. */ +#define DEFAULT_LOWER_FILT_FREQ 133.33334 +/** Default upper edge of mel filter bank. */ +#define DEFAULT_UPPER_FILT_FREQ 6855.4976 +/** Default pre-emphasis filter coefficient. */ +#define DEFAULT_PRE_EMPHASIS_ALPHA 0.97 +/** Default type of frequency warping to use for VTLN. */ +#define DEFAULT_WARP_TYPE "inverse_linear" +/** Default random number seed to use for dithering. */ +#define SEED -1 + +#define waveform_to_cepstral_command_line_macro() \ + { "-logspec", \ + ARG_BOOLEAN, \ + "no", \ + "Write out logspectral files instead of cepstra" }, \ + \ + { "-smoothspec", \ + ARG_BOOLEAN, \ + "no", \ + "Write out cepstral-smoothed logspectral files" }, \ + \ + { "-transform", \ + ARG_STRING, \ + "legacy", \ + "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \ + \ + { "-alpha", \ + ARG_FLOAT32, \ + ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \ + "Preemphasis parameter" }, \ + \ + { "-samprate", \ + ARG_FLOAT32, \ + ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \ + "Sampling rate" }, \ + \ + { "-frate", \ + ARG_INT32, \ + ARG_STRINGIFY(DEFAULT_FRAME_RATE), \ + "Frame rate" }, \ + \ + { "-wlen", \ + ARG_FLOAT32, \ + ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \ + "Hamming window length" }, \ + \ + { "-nfft", \ + ARG_INT32, \ + ARG_STRINGIFY(DEFAULT_FFT_SIZE), \ + "Size of FFT" }, \ + \ + { "-nfilt", \ + ARG_INT32, \ + ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \ + "Number of filter banks" }, \ + \ + { "-lowerf", \ + ARG_FLOAT32, \ + ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \ + "Lower edge of filters" }, \ + \ + { "-upperf", \ + ARG_FLOAT32, \ + ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \ + "Upper edge of filters" }, \ + \ + { "-unit_area", \ + ARG_BOOLEAN, \ + "yes", \ + "Normalize mel filters to unit area" }, \ + \ + { "-round_filters", \ + ARG_BOOLEAN, \ + "yes", \ + "Round mel filter frequencies to DFT points" }, \ + \ + { "-ncep", \ + ARG_INT32, \ + ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \ + "Number of cep coefficients" }, \ + \ + { "-doublebw", \ + ARG_BOOLEAN, \ + "no", \ + "Use double bandwidth filters (same center freq)" }, \ + \ + { "-lifter", \ + ARG_INT32, \ + "0", \ + "Length of sin-curve for liftering, or 0 for no liftering." }, \ + \ + { "-vad_prespeech", \ + ARG_INT32, \ + ARG_STRINGIFY(DEFAULT_PRESPCH_STATE_LEN), \ + "Num of speech frames to trigger vad from silence to speech." }, \ + \ + { "-vad_postspeech", \ + ARG_INT32, \ + ARG_STRINGIFY(DEFAULT_POSTSPCH_STATE_LEN), \ + "Num of silence frames to trigger vad from speech to silence." }, \ + \ + { "-vad_threshold", \ + ARG_FLOAT32, \ + "2.0", \ + "Threshold for decision between noise and silence frames. Log-ratio between signal level and noise level." }, \ + \ + { "-input_endian", \ + ARG_STRING, \ + NATIVE_ENDIAN, \ + "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \ + \ + { "-warp_type", \ + ARG_STRING, \ + DEFAULT_WARP_TYPE, \ + "Warping function type (or shape)" }, \ + \ + { "-warp_params", \ + ARG_STRING, \ + NULL, \ + "Parameters defining the warping function" }, \ + \ + { "-dither", \ + ARG_BOOLEAN, \ + "no", \ + "Add 1/2-bit noise" }, \ + \ + { "-seed", \ + ARG_INT32, \ + ARG_STRINGIFY(SEED), \ + "Seed for random number generator; if less than zero, pick our own" }, \ + \ + { "-remove_dc", \ + ARG_BOOLEAN, \ + "no", \ + "Remove DC offset from each frame" }, \ + \ + { "-remove_noise", \ + ARG_BOOLEAN, \ + "yes", \ + "Remove noise with spectral subtraction in mel-energies" }, \ + \ + { "-remove_silence", \ + ARG_BOOLEAN, \ + "yes", \ + "Enables VAD, removes silence frames from processing" }, \ + \ + { "-verbose", \ + ARG_BOOLEAN, \ + "no", \ + "Show input filenames" } \ + + +#ifdef FIXED_POINT +/** MFCC computation type. */ +typedef fixed32 mfcc_t; + +/** Convert a floating-point value to mfcc_t. */ +#define FLOAT2MFCC(x) FLOAT2FIX(x) +/** Convert a mfcc_t value to floating-point. */ +#define MFCC2FLOAT(x) FIX2FLOAT(x) +/** Multiply two mfcc_t values. */ +#define MFCCMUL(a,b) FIXMUL(a,b) +#define MFCCLN(x,in,out) FIXLN_ANY(x,in,out) +#else /* !FIXED_POINT */ + +/** MFCC computation type. */ +typedef float32 mfcc_t; +/** Convert a floating-point value to mfcc_t. */ +#define FLOAT2MFCC(x) (x) +/** Convert a mfcc_t value to floating-point. */ +#define MFCC2FLOAT(x) (x) +/** Multiply two mfcc_t values. */ +#define MFCCMUL(a,b) ((a)*(b)) +#define MFCCLN(x,in,out) log(x) +#endif /* !FIXED_POINT */ + +/** + * Structure for the front-end computation. + */ +typedef struct fe_s fe_t; + +/** + * Error codes returned by stuff. + */ +enum fe_error_e { + FE_SUCCESS = 0, + FE_OUTPUT_FILE_SUCCESS = 0, + FE_CONTROL_FILE_ERROR = -1, + FE_START_ERROR = -2, + FE_UNKNOWN_SINGLE_OR_BATCH = -3, + FE_INPUT_FILE_OPEN_ERROR = -4, + FE_INPUT_FILE_READ_ERROR = -5, + FE_MEM_ALLOC_ERROR = -6, + FE_OUTPUT_FILE_WRITE_ERROR = -7, + FE_OUTPUT_FILE_OPEN_ERROR = -8, + FE_ZERO_ENERGY_ERROR = -9, + FE_INVALID_PARAM_ERROR = -10 +}; + +/** + * Initialize a front-end object from global command-line. + * + * This is equivalent to calling fe_init_auto_r(cmd_ln_get()). + * + * @return Newly created front-end object. + */ +SPHINXBASE_EXPORT +fe_t* fe_init_auto(void); + +/** + * Get the default set of arguments for fe_init_auto_r(). + * + * @return Pointer to an argument structure which can be passed to + * cmd_ln_init() in friends to create argument structures for + * fe_init_auto_r(). + */ +SPHINXBASE_EXPORT +arg_t const *fe_get_args(void); + +/** + * Initialize a front-end object from a command-line parse. + * + * @param config Command-line object, as returned by cmd_ln_parse_r() + * or cmd_ln_parse_file(). Ownership of this object is + * claimed by the fe_t, so you must not attempt to free + * it manually. Use cmd_ln_retain() if you wish to + * reuse it. + * @return Newly created front-end object. + */ +SPHINXBASE_EXPORT +fe_t *fe_init_auto_r(cmd_ln_t *config); + +/** + * Retrieve the command-line object used to initialize this front-end. + * + * @return command-line object for this front-end. This pointer is + * retained by the fe_t, so you should not attempt to free it + * manually. + */ +SPHINXBASE_EXPORT +const cmd_ln_t *fe_get_config(fe_t *fe); + +/** + * Start processing of the stream, resets processed frame counter + */ +SPHINXBASE_EXPORT +void fe_start_stream(fe_t *fe); + +/** + * Start processing an utterance. + * @return 0 for success, <0 for error (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_start_utt(fe_t *fe); + +/** + * Get the dimensionality of the output of this front-end object. + * + * This is guaranteed to be the number of values in one frame of + * output from fe_end_utt() and fe_process_frames(). + * It is usually the number of MFCC + * coefficients, but it might be the number of log-spectrum bins, if + * the -logspec or -smoothspec options to + * fe_init_auto() were true. + * + * @return Dimensionality of front-end output. + */ +SPHINXBASE_EXPORT +int fe_get_output_size(fe_t *fe); + +/** + * Get the dimensionality of the input to this front-end object. + * + * This function retrieves the number of input samples consumed by one + * frame of processing. To obtain one frame of output, you must have + * at least *out_frame_size samples. To obtain N + * frames of output, you must have at least (N-1) * + * *out_frame_shift + *out_frame_size input samples. + * + * @param out_frame_shift Output: Number of samples between each frame start. + * @param out_frame_size Output: Number of samples in each frame. + */ +SPHINXBASE_EXPORT +void fe_get_input_size(fe_t *fe, int *out_frame_shift, + int *out_frame_size); + +/** + * Get vad state for the last processed frame + * + * @return 1 if speech, 0 if silence + */ +SPHINXBASE_EXPORT +uint8 fe_get_vad_state(fe_t *fe); + +/** + * Finish processing an utterance. + * + * This function also collects any remaining samples and calculates a + * final cepstral vector. If there are overflow samples remaining, it + * will pad with zeros to make a complete frame. + * + * @param fe Front-end object. + * @param out_cepvector Buffer to hold a residual cepstral vector, or NULL + * if you wish to ignore it. Must be large enough + * @param out_nframes Number of frames of residual cepstra created + * (either 0 or 1). + * @return 0 for success, <0 for error (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_end_utt(fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes); + +/** + * Retain ownership of a front end object. + * + * @return pointer to the retained front end. + */ +SPHINXBASE_EXPORT +fe_t *fe_retain(fe_t *fe); + +/** + * Free the front end. + * + * Releases resources associated with the front-end object. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int fe_free(fe_t *fe); + +/* + * Do same as fe_process_frames, but also returns + * voiced audio. Output audio is valid till next + * fe_process_frames call. + * + * DO NOT MIX fe_process_frames calls + * + * @param voiced_spch Output: obtain voiced audio samples here + * + * @param voiced_spch_nsamps Output: shows voiced_spch length + * + * @param out_frameidx Output: index of the utterance start + */ +SPHINXBASE_EXPORT +int fe_process_frames_ext(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int16 **voiced_spch, + int32 *voiced_spch_nsamps, + int32 *out_frameidx); + +/** + * Process a block of samples. + * + * This function generates up to *inout_nframes of + * features, or as many as can be generated from + * *inout_nsamps samples. + * + * On exit, the inout_spch, inout_nsamps, + * and inout_nframes parameters are updated to point to + * the remaining sample data, the number of remaining samples, and the + * number of frames processed, respectively. This allows you to call + * this repeatedly to process a large block of audio in small (say, + * 5-frame) chunks: + * + * int16 *bigbuf, *p; + * mfcc_t **cepstra; + * int32 nsamps; + * int32 nframes = 5; + * + * cepstra = (mfcc_t **) + * ckd_calloc_2d(nframes, fe_get_output_size(fe), sizeof(**cepstra)); + * p = bigbuf; + * while (nsamps) { + * nframes = 5; + * fe_process_frames(fe, &p, &nsamps, cepstra, &nframes); + * // Now do something with these frames... + * if (nframes) + * do_some_stuff(cepstra, nframes); + * } + * + * @param inout_spch Input: Pointer to pointer to speech samples + * (signed 16-bit linear PCM). + * Output: Pointer to remaining samples. + * @param inout_nsamps Input: Pointer to maximum number of samples to + * process. + * Output: Number of samples remaining in input buffer. + * @param buf_cep Two-dimensional buffer (allocated with + * ckd_calloc_2d()) which will receive frames of output + * data. If NULL, no actual processing will be done, + * and the maximum number of output frames which would + * be generated is returned in + * *inout_nframes. + * @param inout_nframes Input: Pointer to maximum number of frames to + * generate. + * Output: Number of frames actually generated. + * @param out_frameidx Index of the first frame returned in a stream + * + * @return 0 for success, <0 for failure (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_process_frames(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int32 *out_frameidx); + +/** + * Process a block of samples, returning as many frames as possible. + * + * This function processes all the samples in a block of data and + * returns a newly allocated block of feature vectors. This block + * needs to be freed with fe_free_2d() after use. + * + * It is possible for there to be some left-over data which could not + * fit in a complete frame. This data can be processed with + * fe_end_utt(). + * + * This function is deprecated in favor of fe_process_frames(). + * + * @return 0 for success, <0 for failure (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_process_utt(fe_t *fe, /**< A front end object */ + int16 const *spch, /**< The speech samples */ + size_t nsamps, /**< number of samples*/ + mfcc_t ***cep_block, /**< Output pointer to cepstra */ + int32 *nframes /**< Number of frames processed */ + ); + +/** + * Free the output pointer returned by fe_process_utt(). + **/ +SPHINXBASE_EXPORT +void fe_free_2d(void *arr); + +/** + * Convert a block of mfcc_t to float32 (can be done in-place) + **/ +SPHINXBASE_EXPORT +int fe_mfcc_to_float(fe_t *fe, + mfcc_t **input, + float32 **output, + int32 nframes); + +/** + * Convert a block of float32 to mfcc_t (can be done in-place) + **/ +SPHINXBASE_EXPORT +int fe_float_to_mfcc(fe_t *fe, + float32 **input, + mfcc_t **output, + int32 nframes); + +/** + * Process one frame of log spectra into MFCC using discrete cosine + * transform. + * + * This uses a variant of the DCT-II where the first frequency bin is + * scaled by 0.5. Unless somebody misunderstood the DCT-III equations + * and thought that's what they were implementing here, this is + * ostensibly done to account for the symmetry properties of the + * DCT-II versus the DFT - the first coefficient of the input is + * assumed to be repeated in the negative frequencies, which is not + * the case for the DFT. (This begs the question, why not just use + * the DCT-I, since it has the appropriate symmetry properties...) + * Moreover, this is bogus since the mel-frequency bins on which we + * are doing the DCT don't extend to the edge of the DFT anyway. + * + * This also means that the matrix used in computing this DCT can not + * be made orthogonal, and thus inverting the transform is difficult. + * Therefore if you want to do cepstral smoothing or have some other + * reason to invert your MFCCs, use fe_logspec_dct2() and its inverse + * fe_logspec_dct3() instead. + * + * Also, it normalizes by 1/nfilt rather than 2/nfilt, for some reason. + **/ +SPHINXBASE_EXPORT +int fe_logspec_to_mfcc(fe_t *fe, /**< A fe structure */ + const mfcc_t *fr_spec, /**< One frame of spectrum */ + mfcc_t *fr_cep /**< One frame of cepstrum */ + ); + +/** + * Convert log spectra to MFCC using DCT-II. + * + * This uses the "unitary" form of the DCT-II, i.e. with a scaling + * factor of sqrt(2/N) and a "beta" factor of sqrt(1/2) applied to the + * cos(0) basis vector (i.e. the one corresponding to the DC + * coefficient in the output). + **/ +SPHINXBASE_EXPORT +int fe_logspec_dct2(fe_t *fe, /**< A fe structure */ + const mfcc_t *fr_spec, /**< One frame of spectrum */ + mfcc_t *fr_cep /**< One frame of cepstrum */ + ); + +/** + * Convert MFCC to log spectra using DCT-III. + * + * This uses the "unitary" form of the DCT-III, i.e. with a scaling + * factor of sqrt(2/N) and a "beta" factor of sqrt(1/2) applied to the + * cos(0) basis vector (i.e. the one corresponding to the DC + * coefficient in the input). + **/ +SPHINXBASE_EXPORT +int fe_mfcc_dct3(fe_t *fe, /**< A fe structure */ + const mfcc_t *fr_cep, /**< One frame of cepstrum */ + mfcc_t *fr_spec /**< One frame of spectrum */ + ); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/media/sphinxbase/sphinxbase/feat.h b/media/sphinxbase/sphinxbase/feat.h new file mode 100644 index 000000000..5f16a6464 --- /dev/null +++ b/media/sphinxbase/sphinxbase/feat.h @@ -0,0 +1,469 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * feat.h -- Cepstral features computation. + */ + +#ifndef _S3_FEAT_H_ +#define _S3_FEAT_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** \file feat.h + * \brief compute the dynamic coefficients from the cepstral vector. + */ +#define LIVEBUFBLOCKSIZE 256 /** Blocks of 256 vectors allocated + for livemode decoder */ +#define S3_MAX_FRAMES 15000 /* RAH, I believe this is still too large, but better than before */ + +#define cepstral_to_feature_command_line_macro() \ +{ "-feat", \ + ARG_STRING, \ + "1s_c_d_dd", \ + "Feature stream type, depends on the acoustic model" }, \ +{ "-ceplen", \ + ARG_INT32, \ + "13", \ + "Number of components in the input feature vector" }, \ +{ "-cmn", \ + ARG_STRING, \ + "current", \ + "Cepstral mean normalization scheme ('current', 'prior', or 'none')" }, \ +{ "-cmninit", \ + ARG_STRING, \ + "8.0", \ + "Initial values (comma-separated) for cepstral mean when 'prior' is used" }, \ +{ "-varnorm", \ + ARG_BOOLEAN, \ + "no", \ + "Variance normalize each utterance (only if CMN == current)" }, \ +{ "-agc", \ + ARG_STRING, \ + "none", \ + "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \ +{ "-agcthresh", \ + ARG_FLOAT32, \ + "2.0", \ + "Initial threshold for automatic gain control" }, \ +{ "-lda", \ + ARG_STRING, \ + NULL, \ + "File containing transformation matrix to be applied to features (single-stream features only)" }, \ +{ "-ldadim", \ + ARG_INT32, \ + "0", \ + "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \ +{"-svspec", \ + ARG_STRING, \ + NULL, \ + "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"} + +/** + * \struct feat_t + * \brief Structure for describing a speech feature type + * Structure for describing a speech feature type (no. of streams and stream widths), + * as well as the computation for converting the input speech (e.g., Sphinx-II format + * MFC cepstra) into this type of feature vectors. + */ +typedef struct feat_s { + int refcount; /**< Reference count. */ + char *name; /**< Printable name for this feature type */ + int32 cepsize; /**< Size of input speech vector (typically, a cepstrum vector) */ + int32 n_stream; /**< Number of feature streams; e.g., 4 in Sphinx-II */ + uint32 *stream_len; /**< Vector length of each feature stream */ + int32 window_size; /**< Number of extra frames around given input frame needed to compute + corresponding output feature (so total = window_size*2 + 1) */ + int32 n_sv; /**< Number of subvectors */ + uint32 *sv_len; /**< Vector length of each subvector */ + int32 **subvecs; /**< Subvector specification (or NULL for none) */ + mfcc_t *sv_buf; /**< Temporary copy buffer for subvector projection */ + int32 sv_dim; /**< Total dimensionality of subvector (length of sv_buf) */ + + cmn_type_t cmn; /**< Type of CMN to be performed on each utterance */ + int32 varnorm; /**< Whether variance normalization is to be performed on each utt; + Irrelevant if no CMN is performed */ + agc_type_t agc; /**< Type of AGC to be performed on each utterance */ + + /** + * Feature computation function. + * @param fcb the feat_t describing this feature type + * @param input pointer into the input cepstra + * @param feat a 2-d array of output features (n_stream x stream_len) + * @return 0 if successful, -ve otherwise. + * + * Function for converting window of input speech vector + * (input[-window_size..window_size]) to output feature vector + * (feat[stream][]). If NULL, no conversion available, the + * speech input must be feature vector itself. + **/ + void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat); + cmn_t *cmn_struct; /**< Structure that stores the temporary variables for cepstral + means normalization*/ + agc_t *agc_struct; /**< Structure that stores the temporary variables for acoustic + gain control*/ + + mfcc_t **cepbuf; /**< Circular buffer of MFCC frames for live feature computation. */ + mfcc_t **tmpcepbuf; /**< Array of pointers into cepbuf to handle border cases. */ + int32 bufpos; /**< Write index in cepbuf. */ + int32 curpos; /**< Read index in cepbuf. */ + + mfcc_t ***lda; /**< Array of linear transformations (for LDA, MLLT, or whatever) */ + uint32 n_lda; /**< Number of linear transformations in lda. */ + uint32 out_dim; /**< Output dimensionality */ +} feat_t; + +/** + * Name of feature type. + */ +#define feat_name(f) ((f)->name) +/** + * Input dimensionality of feature. + */ +#define feat_cepsize(f) ((f)->cepsize) +/** + * Size of dynamic feature window. + */ +#define feat_window_size(f) ((f)->window_size) +/** + * Number of feature streams. + * + * @deprecated Do not use this, use feat_dimension1() instead. + */ +#define feat_n_stream(f) ((f)->n_stream) +/** + * Length of feature stream i. + * + * @deprecated Do not use this, use feat_dimension2() instead. + */ +#define feat_stream_len(f,i) ((f)->stream_len[i]) +/** + * Number of streams or subvectors in feature output. + */ +#define feat_dimension1(f) ((f)->n_sv ? (f)->n_sv : f->n_stream) +/** + * Dimensionality of stream/subvector i in feature output. + */ +#define feat_dimension2(f,i) ((f)->lda ? (f)->out_dim : ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i])) +/** + * Total dimensionality of feature output. + */ +#define feat_dimension(f) ((f)->out_dim) +/** + * Array with stream/subvector lengths + */ +#define feat_stream_lengths(f) ((f)->lda ? (&(f)->out_dim) : (f)->sv_len ? (f)->sv_len : f->stream_len) + +/** + * Parse subvector specification string. + * + * Format of specification: + * \li '/' separated list of subvectors + * \li each subvector is a ',' separated list of subranges + * \li each subrange is a single \verbatim \endverbatim or + * \verbatim - \endverbatim (inclusive), where + * \verbatim \endverbatim is a feature vector dimension + * specifier. + * + * E.g., "24,0-11/25,12-23/26,27-38" has: + * \li 3 subvectors + * \li the 1st subvector has feature dims: 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11. + * \li etc. + * + * @param str subvector specification string. + * @return allocated 2-D array of subvector specs (free with + * subvecs_free()). If there are N subvectors specified, subvec[N] = + * NULL; and each subvec[0]..subvec[N-1] is -1 terminated vector of + * feature dims. + */ +SPHINXBASE_EXPORT +int32 **parse_subvecs(char const *str); + +/** + * Free array of subvector specs. + */ +SPHINXBASE_EXPORT +void subvecs_free(int32 **subvecs); + + +/** + * Allocate an array to hold several frames worth of feature vectors. The returned value + * is the mfcc_t ***data array, organized as follows: + * + * - data[0][0] = frame 0 stream 0 vector, data[0][1] = frame 0 stream 1 vector, ... + * - data[1][0] = frame 1 stream 0 vector, data[0][1] = frame 1 stream 1 vector, ... + * - data[2][0] = frame 2 stream 0 vector, data[0][1] = frame 2 stream 1 vector, ... + * - ... + * + * NOTE: For I/O convenience, the entire data area is allocated as one contiguous block. + * @return pointer to the allocated space if successful, NULL if any error. + */ +SPHINXBASE_EXPORT +mfcc_t ***feat_array_alloc(feat_t *fcb, /**< In: Descriptor from feat_init(), used + to obtain number of streams and stream sizes */ + int32 nfr /**< In: Number of frames for which to allocate */ + ); + +/** + * Realloate the array of features. Requires us to know the old size + */ +SPHINXBASE_EXPORT +mfcc_t ***feat_array_realloc(feat_t *fcb, /**< In: Descriptor from feat_init(), used + to obtain number of streams and stream sizes */ + mfcc_t ***old_feat, /**< Feature array. Freed */ + int32 ofr, /**< In: Previous number of frames */ + int32 nfr /**< In: Number of frames for which to allocate */ + ); + +/** + * Free a buffer allocated with feat_array_alloc() + */ +SPHINXBASE_EXPORT +void feat_array_free(mfcc_t ***feat); + + +/** + * Initialize feature module to use the selected type of feature stream. + * One-time only initialization at the beginning of the program. Input type + * is a string defining the kind of input->feature conversion desired: + * + * - "s2_4x": s2mfc->Sphinx-II 4-feature stream, + * - "1s_c_d_dd": s2mfc->Sphinx 3.x single feature stream, + * - "s3_1x39": s2mfc->Sphinx 3.0 single feature stream, + * - "n1,n2,n3,...": Explicit feature vector layout spec. with comma-separated + * feature stream lengths. In this case, the input data is already in the + * feature format and there is no conversion necessary. + * + * @return (feat_t *) descriptor if successful, NULL if error. Caller + * must not directly modify the contents of the returned value. + */ +SPHINXBASE_EXPORT +feat_t *feat_init(char const *type,/**< In: Type of feature stream */ + cmn_type_t cmn, /**< In: Type of cepstram mean normalization to + be done before feature computation; can be + CMN_NONE (for none) */ + int32 varnorm, /**< In: (boolean) Whether variance + normalization done on each utt; only + applicable if CMN also done */ + agc_type_t agc, /**< In: Type of automatic gain control to be + done before feature computation */ + int32 breport, /**< In: Whether to show a report for feat_t */ + int32 cepsize /**< Number of components in the input vector + (or 0 for the default for this feature type, + which is usually 13) */ + ); + +/** + * Add an LDA transformation to the feature module from a file. + * @return 0 for success or -1 if reading the LDA file failed. + **/ +SPHINXBASE_EXPORT +int32 feat_read_lda(feat_t *feat, /**< In: Descriptor from feat_init() */ + const char *ldafile, /**< In: File to read the LDA matrix from. */ + int32 dim /**< In: Dimensionality of LDA output. */ + ); + +/** + * Transform a block of features using the feature module's LDA transform. + **/ +SPHINXBASE_EXPORT +void feat_lda_transform(feat_t *fcb, /**< In: Descriptor from feat_init() */ + mfcc_t ***inout_feat, /**< Feature block to transform. */ + uint32 nfr /**< In: Number of frames in inout_feat. */ + ); + +/** + * Add a subvector specification to the feature module. + * + * The subvector splitting will be performed after dynamic feature + * computation, CMN, AGC, and any LDA transformation. The number of + * streams in the dynamic feature type must be one, as with LDA. + * + * After adding a subvector specification, the output of feature + * computation will be split into multiple subvectors, and + * feat_array_alloc() will allocate pointers accordingly. The number + * of streams will remain the + * + * @param fcb the feature descriptor. + * @param subvecs subvector specification. This pointer is retained + * by the feat_t and should not be freed manually. + * @return 0 for success or -1 if the subvector specification was + * invalid. + */ +SPHINXBASE_EXPORT +int feat_set_subvecs(feat_t *fcb, int32 **subvecs); + +/** + * Print the given block of feature vectors to the given FILE. + */ +SPHINXBASE_EXPORT +void feat_print(feat_t *fcb, /**< In: Descriptor from feat_init() */ + mfcc_t ***feat, /**< In: Feature data to be printed */ + int32 nfr, /**< In: Number of frames of feature data above */ + FILE *fp /**< In: Output file pointer */ + ); + + +/** + * Read a specified MFC file (or given segment within it), perform + * CMN/AGC as indicated by fcb, and compute feature + * vectors. Feature vectors are computed for the entire segment + * specified, by including additional surrounding or padding frames to + * accommodate the feature windows. + * + * @return Number of frames of feature vectors computed if successful; + * -1 if any error. If feat is NULL, then no actual + * computation will be done, and the number of frames which must be + * allocated will be returned. + * + * A note on how the file path is constructed: If the control file + * already specifies extension or absolute path, then these are not + * applied. The default extension is defined by the application. + */ +SPHINXBASE_EXPORT +int32 feat_s2mfc2feat(feat_t *fcb, /**< In: Descriptor from feat_init() */ + const char *file, /**< In: File to be read */ + const char *dir, /**< In: Directory prefix for file, + if needed; can be NULL */ + const char *cepext,/**< In: Extension of the + cepstrum file.It cannot be + NULL */ + int32 sf, int32 ef, /* Start/End frames + within file to be read. Use + 0,-1 to process entire + file */ + mfcc_t ***feat, /**< Out: Computed feature vectors; + caller must allocate this space */ + int32 maxfr /**< In: Available space (number of frames) in + above feat array; it must be + sufficient to hold the result. + Pass -1 for no limit. */ + ); + + +/** + * Feature computation routine for live mode decoder. + * + * This function computes features for blocks of incoming data. It + * retains an internal buffer for computing deltas, which means that + * the number of output frames will not necessarily equal the number + * of input frames. + * + * It is very important to realize that the number of + * output frames can be greater than the number of + * input frames, specifically when endutt is true. It is + * guaranteed to never exceed *inout_ncep + + * feat_window_size(fcb). You MUST have + * allocated at least that many frames in ofeat, or you + * will experience a buffer overflow. + * + * If beginutt and endutt are both true, CMN_CURRENT and AGC_MAX will + * be done. Otherwise only CMN_PRIOR and AGC_EMAX will be done. + * + * If beginutt is false, endutt is true, and the number of input + * frames exceeds the input size, then end-of-utterance processing + * won't actually be done. This condition can easily be checked, + * because *inout_ncep will equal the return value on + * exit, and will also be smaller than the value of + * *inout_ncep on entry. + * + * @return The number of output frames actually computed. + **/ +SPHINXBASE_EXPORT +int32 feat_s2mfc2feat_live(feat_t *fcb, /**< In: Descriptor from feat_init() */ + mfcc_t **uttcep, /**< In: Incoming cepstral buffer */ + int32 *inout_ncep,/**< In: Size of incoming buffer. + Out: Number of incoming frames consumed. */ + int32 beginutt, /**< In: Begining of utterance flag */ + int32 endutt, /**< In: End of utterance flag */ + mfcc_t ***ofeat /**< In: Output feature buffer. See + VERY IMPORTANT note + about the size of this buffer above. */ + ); + + +/** + * Update the normalization stats, possibly in the end of utterance + * + */ +SPHINXBASE_EXPORT +void feat_update_stats(feat_t *fcb); + + +/** + * Retain ownership of feat_t. + * + * @return pointer to retained feat_t. + */ +SPHINXBASE_EXPORT +feat_t *feat_retain(feat_t *f); + +/** + * Release resource associated with feat_t + * + * @return new reference count (0 if freed) + */ +SPHINXBASE_EXPORT +int feat_free(feat_t *f /**< In: feat_t */ + ); + +/** + * Report the feat_t data structure + */ +SPHINXBASE_EXPORT +void feat_report(feat_t *f /**< In: feat_t */ + ); +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/media/sphinxbase/sphinxbase/filename.h b/media/sphinxbase/sphinxbase/filename.h new file mode 100644 index 000000000..b69dcde57 --- /dev/null +++ b/media/sphinxbase/sphinxbase/filename.h @@ -0,0 +1,112 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * filename.h -- File and path name operations. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: filename.h,v $ + * Revision 1.7 2005/06/22 03:01:07 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 30-Oct-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Started. + */ + + +#ifndef _LIBUTIL_FILENAME_H_ +#define _LIBUTIL_FILENAME_H_ + +/* Win32/WinCE DLL gunk */ +#include +#include + +/**\file filename.h + *\brief File names related operation + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Returns the last part of the path, without modifying anything in memory. + */ +SPHINXBASE_EXPORT +const char *path2basename(const char *path); + +/** + * Strip off filename from the given path and copy the directory name into dir + * Caller must have allocated dir (hint: it's always shorter than path). + */ +SPHINXBASE_EXPORT +void path2dirname(const char *path, char *dir); + + +/** + * Strip off the smallest trailing file-extension suffix and copy + * the rest into the given root argument. Caller must have + * allocated root. + */ +SPHINXBASE_EXPORT +void strip_fileext(const char *file, char *root); + +/** + * Test whether a pathname is absolute for the current OS. + */ +SPHINXBASE_EXPORT +int path_is_absolute(const char *file); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/media/sphinxbase/sphinxbase/fixpoint.h b/media/sphinxbase/sphinxbase/fixpoint.h new file mode 100644 index 000000000..30b5cb202 --- /dev/null +++ b/media/sphinxbase/sphinxbase/fixpoint.h @@ -0,0 +1,145 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== */ + +/* Fixed-point arithmetic macros. + * + * Author: David Huggins-Daines + */ + +#ifndef _FIXPOINT_H_ +#define _FIXPOINT_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#ifndef DEFAULT_RADIX +#define DEFAULT_RADIX 12 +#endif + +/** Fixed-point computation type. */ +typedef int32 fixed32; + +/** Convert floating point to fixed point. */ +#define FLOAT2FIX_ANY(x,radix) \ + (((x)<0.0) ? \ + ((fixed32)((x)*(float32)(1<<(radix)) - 0.5)) \ + : ((fixed32)((x)*(float32)(1<<(radix)) + 0.5))) +#define FLOAT2FIX(x) FLOAT2FIX_ANY(x,DEFAULT_RADIX) +/** Convert fixed point to floating point. */ +#define FIX2FLOAT_ANY(x,radix) ((float32)(x)/(1<<(radix))) +#define FIX2FLOAT(x) FIX2FLOAT_ANY(x,DEFAULT_RADIX) + +/** + * Multiply two fixed point numbers with an arbitrary radix point. + * + * A veritable multiplicity of implementations exist, starting with + * the fastest ones... + */ + +#if defined(__arm__) && !defined(__thumb__) +/* + * This works on most modern ARMs but *only* in ARM mode (for obvious + * reasons), so don't use it in Thumb mode (but why are you building + * signal processing code in Thumb mode?!) + */ +#define FIXMUL(a,b) FIXMUL_ANY(a,b,DEFAULT_RADIX) +#define FIXMUL_ANY(a,b,r) ({ \ + int cl, ch, _a = a, _b = b; \ + __asm__ ("smull %0, %1, %2, %3\n" \ + "mov %0, %0, lsr %4\n" \ + "orr %0, %0, %1, lsl %5\n" \ + : "=&r" (cl), "=&r" (ch) \ + : "r" (_a), "r" (_b), "i" (r), "i" (32-(r)));\ + cl; }) + +#elif defined(_MSC_VER) || (defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8) +/* Standard systems*/ +#define FIXMUL(a,b) FIXMUL_ANY(a,b,DEFAULT_RADIX) +#define FIXMUL_ANY(a,b,radix) ((fixed32)(((int64)(a)*(b))>>(radix))) + +#else +/* Most general case where 'long long' doesn't exist or is slow. */ +#define FIXMUL(a,b) FIXMUL_ANY(a,b,DEFAULT_RADIX) +#define FIXMUL_ANY(a,b,radix) ({ \ + int32 _ah, _bh; \ + uint32 _al, _bl, _t, c; \ + _ah = ((int32)(a)) >> 16; \ + _bh = ((int32)(b)) >> 16; \ + _al = ((uint32)(a)) & 0xffff; \ + _bl = ((uint32)(b)) & 0xffff; \ + _t = _ah * _bl + _al * _bh; \ + c = (fixed32)(((_al * _bl) >> (radix)) \ + + ((_ah * _bh) << (32 - (radix))) \ + + ((radix) > 16 ? (_t >> (radix - 16)) : (_t << (16 - radix)))); \ + c;}) +#endif + +/* Various fixed-point logarithmic functions that we need. */ +/** Minimum value representable in log format. */ +#define MIN_FIXLOG -2829416 /* log(1e-300) * (1< +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include +#include +#include + +/* + * A single transition in the FSG. + */ +typedef struct fsg_link_s { + int32 from_state; + int32 to_state; + int32 logs2prob; /**< log(transition probability)*lw */ + int32 wid; /**< Word-ID; <0 if epsilon or null transition */ +} fsg_link_t; + +/* Access macros */ +#define fsg_link_from_state(l) ((l)->from_state) +#define fsg_link_to_state(l) ((l)->to_state) +#define fsg_link_wid(l) ((l)->wid) +#define fsg_link_logs2prob(l) ((l)->logs2prob) + +/** + * Adjacency list (opaque) for a state in an FSG. + */ +typedef struct trans_list_s trans_list_t; + +/** + * Word level FSG definition. + * States are simply integers 0..n_state-1. + * A transition emits a word and has a given probability of being taken. + * There can also be null or epsilon transitions, with no associated emitted + * word. + */ +typedef struct fsg_model_s { + int refcount; /**< Reference count. */ + char *name; /**< A unique string identifier for this FSG */ + int32 n_word; /**< Number of unique words in this FSG */ + int32 n_word_alloc; /**< Number of words allocated in vocab */ + char **vocab; /**< Vocabulary for this FSG. */ + bitvec_t *silwords; /**< Indicates which words are silence/fillers. */ + bitvec_t *altwords; /**< Indicates which words are pronunciation alternates. */ + logmath_t *lmath; /**< Pointer to log math computation object. */ + int32 n_state; /**< number of states in FSG */ + int32 start_state; /**< Must be in the range [0..n_state-1] */ + int32 final_state; /**< Must be in the range [0..n_state-1] */ + float32 lw; /**< Language weight that's been applied to transition + logprobs */ + trans_list_t *trans; /**< Transitions out of each state, if any. */ + listelem_alloc_t *link_alloc; /**< Allocator for FSG links. */ +} fsg_model_t; + +/* Access macros */ +#define fsg_model_name(f) ((f)->name) +#define fsg_model_n_state(f) ((f)->n_state) +#define fsg_model_start_state(f) ((f)->start_state) +#define fsg_model_final_state(f) ((f)->final_state) +#define fsg_model_log(f,p) logmath_log((f)->lmath, p) +#define fsg_model_lw(f) ((f)->lw) +#define fsg_model_n_word(f) ((f)->n_word) +#define fsg_model_word_str(f,wid) (wid == -1 ? "(NULL)" : (f)->vocab[wid]) + +/** + * Iterator over arcs. + */ +typedef struct fsg_arciter_s fsg_arciter_t; + +/** + * Have silence transitions been added? + */ +#define fsg_model_has_sil(f) ((f)->silwords != NULL) + +/** + * Have alternate word transitions been added? + */ +#define fsg_model_has_alt(f) ((f)->altwords != NULL) + +#define fsg_model_is_filler(f,wid) \ + (fsg_model_has_sil(f) ? bitvec_is_set((f)->silwords, wid) : FALSE) +#define fsg_model_is_alt(f,wid) \ + (fsg_model_has_alt(f) ? bitvec_is_set((f)->altwords, wid) : FALSE) + +/** + * Create a new FSG. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_init(char const *name, logmath_t *lmath, + float32 lw, int32 n_state); + +/** + * Read a word FSG from the given file and return a pointer to the structure + * created. Return NULL if any error occurred. + * + * File format: + * + *
+ *   Any number of comment lines; ignored
+ *   FSG_BEGIN []
+ *   N <#states>
+ *   S 
+ *   F 
+ *   T    []
+ *   T ...
+ *   ... (any number of state transitions)
+ *   FSG_END
+ *   Any number of comment lines; ignored
+ * 
+ * + * The FSG spec begins with the line containing the keyword FSG_BEGIN. + * It has an optional fsg name string. If not present, the FSG has the empty + * string as its name. + * + * Following the FSG_BEGIN declaration is the number of states, the start + * state, and the final state, each on a separate line. States are numbered + * in the range [0 .. -1]. + * + * These are followed by all the state transitions, each on a separate line, + * and terminated by the FSG_END line. A state transition has the given + * probability of being taken, and emits the given word. The word emission + * is optional; if word-string omitted, it is an epsilon or null transition. + * + * Comments can also be embedded within the FSG body proper (i.e. between + * FSG_BEGIN and FSG_END): any line with a # character in col 1 is treated + * as a comment line. + * + * Return value: a new fsg_model_t structure if the file is successfully + * read, NULL otherwise. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_readfile(const char *file, logmath_t *lmath, float32 lw); + +/** + * Like fsg_model_readfile(), but from an already open stream. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_read(FILE *fp, logmath_t *lmath, float32 lw); + +/** + * Retain ownership of an FSG. + * + * @return Pointer to retained FSG. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_retain(fsg_model_t *fsg); + +/** + * Free the given word FSG. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int fsg_model_free(fsg_model_t *fsg); + +/** + * Add a word to the FSG vocabulary. + * + * @return Word ID for this new word. + */ +SPHINXBASE_EXPORT +int fsg_model_word_add(fsg_model_t *fsg, char const *word); + +/** + * Look up a word in the FSG vocabulary. + * + * @return Word ID for this word + */ +SPHINXBASE_EXPORT +int fsg_model_word_id(fsg_model_t *fsg, char const *word); + +/** + * Add the given transition to the FSG transition matrix. + * + * Duplicates (i.e., two transitions between the same states, with the + * same word label) are flagged and only the highest prob retained. + */ +SPHINXBASE_EXPORT +void fsg_model_trans_add(fsg_model_t * fsg, + int32 from, int32 to, int32 logp, int32 wid); + +/** + * Add a null transition between the given states. + * + * There can be at most one null transition between the given states; + * duplicates are flagged and only the best prob retained. Transition + * probs must be <= 1 (i.e., logprob <= 0). + * + * @return 1 if a new transition was added, 0 if the prob of an existing + * transition was upgraded; -1 if nothing was changed. + */ +SPHINXBASE_EXPORT +int32 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp); + +/** + * Add a "tag" transition between the given states. + * + * A "tag" transition is a null transition with a non-null word ID, + * which corresponds to a semantic tag or other symbol to be output + * when this transition is taken. + * + * As above, there can be at most one null or tag transition between + * the given states; duplicates are flagged and only the best prob + * retained. Transition probs must be <= 1 (i.e., logprob <= 0). + * + * @return 1 if a new transition was added, 0 if the prob of an existing + * transition was upgraded; -1 if nothing was changed. + */ +SPHINXBASE_EXPORT +int32 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, + int32 logp, int32 wid); + +/** + * Obtain transitive closure of null transitions in the given FSG. + * + * @param nulls List of null transitions, or NULL to find them automatically. + * @return Updated list of null transitions. + */ +SPHINXBASE_EXPORT +glist_t fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls); + +/** + * Get the list of transitions (if any) from state i to j. + */ +SPHINXBASE_EXPORT +glist_t fsg_model_trans(fsg_model_t *fsg, int32 i, int32 j); + +/** + * Get an iterator over the outgoing transitions from state i. + */ +SPHINXBASE_EXPORT +fsg_arciter_t *fsg_model_arcs(fsg_model_t *fsg, int32 i); + +/** + * Get the current arc from the arc iterator. + */ +SPHINXBASE_EXPORT +fsg_link_t *fsg_arciter_get(fsg_arciter_t *itor); + +/** + * Move the arc iterator forward. + */ +SPHINXBASE_EXPORT +fsg_arciter_t *fsg_arciter_next(fsg_arciter_t *itor); + +/** + * Free the arc iterator (early termination) + */ +SPHINXBASE_EXPORT +void fsg_arciter_free(fsg_arciter_t *itor); +/** + * Get the null transition (if any) from state i to j. + */ +SPHINXBASE_EXPORT +fsg_link_t *fsg_model_null_trans(fsg_model_t *fsg, int32 i, int32 j); + +/** + * Add silence word transitions to each state in given FSG. + * + * @param state state to add a self-loop to, or -1 for all states. + * @param silprob probability of silence transition. + */ +SPHINXBASE_EXPORT +int fsg_model_add_silence(fsg_model_t * fsg, char const *silword, + int state, float32 silprob); + +/** + * Add alternate pronunciation transitions for a word in given FSG. + */ +SPHINXBASE_EXPORT +int fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, + char const *altword); + +/** + * Write FSG to a file. + */ +SPHINXBASE_EXPORT +void fsg_model_write(fsg_model_t *fsg, FILE *fp); + +/** + * Write FSG to a file. + */ +SPHINXBASE_EXPORT +void fsg_model_writefile(fsg_model_t *fsg, char const *file); + +/** + * Write FSG to a file in AT&T FSM format. + */ +SPHINXBASE_EXPORT +void fsg_model_write_fsm(fsg_model_t *fsg, FILE *fp); + +/** + * Write FSG to a file in AT&T FSM format. + */ +SPHINXBASE_EXPORT +void fsg_model_writefile_fsm(fsg_model_t *fsg, char const *file); + +/** + * Write FSG symbol table to a file (for AT&T FSM) + */ +SPHINXBASE_EXPORT +void fsg_model_write_symtab(fsg_model_t *fsg, FILE *file); + +/** + * Write FSG symbol table to a file (for AT&T FSM) + */ +SPHINXBASE_EXPORT +void fsg_model_writefile_symtab(fsg_model_t *fsg, char const *file); + +#endif /* __FSG_MODEL_H__ */ diff --git a/media/sphinxbase/sphinxbase/genrand.h b/media/sphinxbase/sphinxbase/genrand.h new file mode 100644 index 000000000..ddcc7eed7 --- /dev/null +++ b/media/sphinxbase/sphinxbase/genrand.h @@ -0,0 +1,180 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright +` notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp +*/ + +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * randgen.c : a portable random generator + * + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: genrand.h,v $ + * Revision 1.3 2005/06/22 03:01:50 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Nov-04 ARCHAN (archan@cs.cmu.edu) at Carnegie Mellon University + * First incorporated from the Mersenne Twister Random + * Number Generator package. It was chosen because it is + * in BSD-license and its performance is quite + * reasonable. Of course if you look at the inventors's + * page. This random generator can actually gives + * 19937-bits period. This is already far from we need. + * This will possibly good enough for the next 10 years. + * + * I also downgrade the code a little bit to avoid Sphinx's + * developers misused it. + */ + +#ifndef _LIBUTIL_GENRAND_H_ +#define _LIBUTIL_GENRAND_H_ + +#define S3_RAND_MAX_INT32 0x7fffffff +#include + +/* Win32/WinCE DLL gunk */ +#include + +/** \file genrand.h + *\brief High performance prortable random generator created by Takuji + *Nishimura and Makoto Matsumoto. + * + * A high performance which applied Mersene twister primes to generate + * random number. If probably seeded, the random generator can achieve + * 19937-bits period. For technical detail. Please take a look at + * (FIXME! Need to search for the web site.) http://www. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Macros to simplify calling of random generator function. + * + */ +#define s3_rand_seed(s) genrand_seed(s); +#define s3_rand_int31() genrand_int31() +#define s3_rand_real() genrand_real3() +#define s3_rand_res53() genrand_res53() + +/** + *Initialize the seed of the random generator. + */ +SPHINXBASE_EXPORT +void genrand_seed(unsigned long s); + +/** + *generates a random number on [0,0x7fffffff]-interval + */ +SPHINXBASE_EXPORT +long genrand_int31(void); + +/** + *generates a random number on (0,1)-real-interval + */ +SPHINXBASE_EXPORT +double genrand_real3(void); + +/** + *generates a random number on [0,1) with 53-bit resolution + */ +SPHINXBASE_EXPORT +double genrand_res53(void); + +#ifdef __cplusplus +} +#endif + +#endif /*_LIBUTIL_GENRAND_H_*/ + + + diff --git a/media/sphinxbase/sphinxbase/glist.h b/media/sphinxbase/sphinxbase/glist.h new file mode 100644 index 000000000..cdb18be3b --- /dev/null +++ b/media/sphinxbase/sphinxbase/glist.h @@ -0,0 +1,242 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * glist.h -- Module for maintaining a generic, linear linked-list structure. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: glist.h,v $ + * Revision 1.9 2005/06/22 03:02:51 arthchan2003 + * 1, Fixed doxygen documentation, 2, add keyword. + * + * Revision 1.4 2005/05/03 04:09:11 archan + * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 09-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added glist_chkdup_*(). + * + * 13-Feb-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from earlier version. + */ + + +/** + * \file glist.h + * \brief Generic linked-lists maintenance. + * + * Only insert at the head of the list. A convenient little + * linked-list package, but a double-edged sword: the user must keep + * track of the data type within the linked list elements. When it + * was first written, there was no selective deletions except to + * destroy the entire list. This is modified in later version. + * + * + * (C++ would be good for this, but that's a double-edged sword as well.) + */ + + +#ifndef _LIBUTIL_GLIST_H_ +#define _LIBUTIL_GLIST_H_ + +#include +/* Win32/WinCE DLL gunk */ +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** A node in a generic list + */ +typedef struct gnode_s { + anytype_t data; /** See prim_type.h */ + struct gnode_s *next; /** Next node in list */ +} gnode_t; +typedef gnode_t *glist_t; /** Head of a list of gnodes */ + + +/** Access macros, for convenience + */ +#define gnode_ptr(g) ((g)->data.ptr) +#define gnode_int32(g) ((g)->data.i) +#define gnode_uint32(g) ((g)->data.ui) +#define gnode_float32(g) ((float32)(g)->data.fl) +#define gnode_float64(g) ((g)->data.fl) +#define gnode_next(g) ((g)->next) + + +/** + * Create and prepend a new list node, with the given user-defined data, at the HEAD + * of the given generic list. Return the new list thus formed. + * g may be NULL to indicate an initially empty list. + */ +SPHINXBASE_EXPORT +glist_t glist_add_ptr (glist_t g, /**< a link list */ + void *ptr /**< a pointer */ + ); + +/** + * Create and prepend a new list node containing an integer. + */ +SPHINXBASE_EXPORT +glist_t glist_add_int32 (glist_t g, /**< a link list */ + int32 val /**< an integer value */ + ); +/** + * Create and prepend a new list node containing an unsigned integer. + */ +SPHINXBASE_EXPORT +glist_t glist_add_uint32 (glist_t g, /**< a link list */ + uint32 val /**< an unsigned integer value */ + ); +/** + * Create and prepend a new list node containing a single-precision float. + */ +SPHINXBASE_EXPORT +glist_t glist_add_float32 (glist_t g, /**< a link list */ + float32 val /**< a float32 vlaue */ + ); +/** + * Create and prepend a new list node containing a double-precision float. + */ +SPHINXBASE_EXPORT +glist_t glist_add_float64 (glist_t g, /**< a link list */ + float64 val /**< a float64 vlaue */ + ); + + + +/** + * Create and insert a new list node, with the given user-defined data, after + * the given generic node gn. gn cannot be NULL. + * Return ptr to the newly created gnode_t. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_ptr (gnode_t *gn, /**< a generic node which ptr will be inserted after it*/ + void *ptr /**< pointer inserted */ + ); +/** + * Create and insert a new list node containing an integer. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_int32 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + int32 val /**< int32 inserted */ + ); +/** + * Create and insert a new list node containing an unsigned integer. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_uint32 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + uint32 val /**< uint32 inserted */ + ); +/** + * Create and insert a new list node containing a single-precision float. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_float32 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + float32 val /**< float32 inserted */ + ); +/** + * Create and insert a new list node containing a double-precision float. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_float64 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + float64 val /**< float64 inserted */ + ); + +/** + * Reverse the order of the given glist. (glist_add() adds to the head; one might + * ultimately want the reverse of that.) + * NOTE: The list is reversed "in place"; i.e., no new memory is allocated. + * @return: The head of the new list. + */ +SPHINXBASE_EXPORT +glist_t glist_reverse (glist_t g /**< input link list */ + ); + + +/** + Count the number of element in a given link list + @return the number of elements in the given glist_t +*/ +SPHINXBASE_EXPORT +int32 glist_count (glist_t g /**< input link list */ + ); + +/** + * Free the given generic list; user-defined data contained within is not + * automatically freed. The caller must have done that already. + */ +SPHINXBASE_EXPORT +void glist_free (glist_t g); + + +/** + * Free the given node, gn, of a glist, pred being its predecessor in the list. + * Return ptr to the next node in the list after the freed node. + */ +SPHINXBASE_EXPORT +gnode_t *gnode_free(gnode_t *gn, + gnode_t *pred + ); + +/** + * Return the last node in the given list. + */ +SPHINXBASE_EXPORT +gnode_t *glist_tail (glist_t g); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/hash_table.h b/media/sphinxbase/sphinxbase/hash_table.h new file mode 100644 index 000000000..5de8360b2 --- /dev/null +++ b/media/sphinxbase/sphinxbase/hash_table.h @@ -0,0 +1,443 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * hash.h -- Hash table module. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: hash.h,v $ + * Revision 1.7 2005/06/22 03:04:01 arthchan2003 + * 1, Implemented hash_delete and hash_display, 2, Fixed doxygen documentation, 3, Added keyword. + * + * Revision 1.8 2005/05/24 01:10:54 archan + * Fix a bug when the value only appear in the hash but there is no chain. Also make sure that prev was initialized to NULL. All success cases were tested, but not tested with the deletion is tested. + * + * Revision 1.7 2005/05/24 00:12:31 archan + * Also add function prototype for hash_display in hash.h + * + * Revision 1.4 2005/05/03 04:09:11 archan + * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Removed hash_key2hash(). Added hash_enter_bkey() and hash_lookup_bkey(), + * and len attribute to hash_entry_t. + * + * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added hash_key2hash(). + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Included case sensitive/insensitive option. + * + * 08-31-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +/** + * @file hash_table.h + * @brief Hash table implementation + * + * This hash tables are intended for associating a pointer/integer + * "value" with a char string "key", (e.g., an ID with a word string). + * Subsequently, one can retrieve the value by providing the string + * key. (The reverse functionality--obtaining the string given the + * value--is not provided with the hash table module.) + */ + +/** + * A note by ARCHAN at 20050510: Technically what we use is so-called + * "hash table with buckets" which is very nice way to deal with + * external hashing. There are definitely better ways to do internal + * hashing (i.e. when everything is stored in the memory.) In Sphinx + * 3, this is a reasonable practice because hash table is only used in + * lookup in initialization or in lookups which is not critical for + * speed. + */ + +/** + * Another note by ARCHAN at 20050703: To use this data structure + * properly, it is very important to realize that the users are + * required to handle memory allocation of the C-style keys. The hash + * table will not make a copy of the memory allocated for any of the + * C-style key. It will not allocate memory for it. It will not delete + * memory for it. As a result, the following code sniplet will cause + * memory leak. + * + * while (1){ + * str=(char*)ckd_calloc(str_length,sizeof(char*)) + * if(hash_enter(ht,str,id)!=id){ printf("fail to add key str %s with val id %d\n",str,id)} + * } + * + */ + +/** + * A note by dhuggins on 20061010: Changed this to use void * instead + * of int32 as the value type, so that arbitrary objects can be + * inserted into a hash table (in a way that won't crash on 64-bit + * machines ;) + */ + +#ifndef _LIBUTIL_HASH_H_ +#define _LIBUTIL_HASH_H_ + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * The hash table structures. + * Each hash table is identified by a hash_table_t structure. hash_table_t.table is + * pre-allocated for a user-controlled max size, and is initially empty. As new + * entries are created (using hash_enter()), the empty entries get filled. If multiple + * keys hash to the same entry, new entries are allocated and linked together in a + * linear list. + */ + +typedef struct hash_entry_s { + const char *key; /** Key string, NULL if this is an empty slot. + NOTE that the key must not be changed once the entry + has been made. */ + size_t len; /** Key-length; the key string does not have to be a C-style NULL + terminated string; it can have arbitrary binary bytes */ + void *val; /** Value associated with above key */ + struct hash_entry_s *next; /** For collision resolution */ +} hash_entry_t; + +typedef struct { + hash_entry_t *table; /**Primary hash table, excluding entries that collide */ + int32 size; /** Primary hash table size, (is a prime#); NOTE: This is the + number of primary entries ALLOCATED, NOT the number of valid + entries in the table */ + int32 inuse; /** Number of valid entries in the table. */ + int32 nocase; /** Whether case insensitive for key comparisons */ +} hash_table_t; + +typedef struct hash_iter_s { + hash_table_t *ht; /**< Hash table we are iterating over. */ + hash_entry_t *ent; /**< Current entry in that table. */ + size_t idx; /**< Index of next bucket to search. */ +} hash_iter_t; + +/** Access macros */ +#define hash_entry_val(e) ((e)->val) +#define hash_entry_key(e) ((e)->key) +#define hash_entry_len(e) ((e)->len) +#define hash_table_inuse(h) ((h)->inuse) +#define hash_table_size(h) ((h)->size) + + +/** + * Allocate a new hash table for a given expected size. + * + * @note Case sensitivity of hash keys applies to 7-bit ASCII + * characters only, and is not locale-dependent. + * + * @return handle to allocated hash table. + */ +SPHINXBASE_EXPORT +hash_table_t * hash_table_new(int32 size, /**< In: Expected number of entries in the table */ + int32 casearg /**< In: Whether case insensitive for key + comparisons. When 1, case is insentitive, + 0, case is sensitive. */ + ); + +#define HASH_CASE_YES 0 +#define HASH_CASE_NO 1 + +/** + * Free the specified hash table; the caller is responsible for freeing the key strings + * pointed to by the table entries. + */ +SPHINXBASE_EXPORT +void hash_table_free(hash_table_t *h /**< In: Handle of hash table to free */ + ); + + +/** + * Try to add a new entry with given key and associated value to hash table h. If key doesn't + * already exist in hash table, the addition is successful, and the return value is val. But + * if key already exists, return its existing associated value. (The hash table is unchanged; + * it is up to the caller to resolve the conflict.) + */ +SPHINXBASE_EXPORT +void *hash_table_enter(hash_table_t *h, /**< In: Handle of hash table in which to create entry */ + const char *key, /**< In: C-style NULL-terminated key string + for the new entry */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Add a 32-bit integer value to a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_enter_int32(h,k,v) \ + ((int32)(long)hash_table_enter((h),(k),(void *)(long)(v))) + +/** + * Add a new entry with given key and value to hash table h. If the + * key already exists, its value is replaced with the given value, and + * the previous value is returned, otherwise val is returned. + * + * A very important but subtle point: The key pointer in the hash + * table is replaced with the pointer passed to this function. + * In general you should always pass a pointer to hash_table_enter() + * whose lifetime matches or exceeds that of the hash table. In some + * rare cases it is convenient to initially enter a value with a + * short-lived key, then later replace that with a long-lived one. + * This behaviour allows this to happen. + */ +SPHINXBASE_EXPORT +void *hash_table_replace(hash_table_t *h, /**< In: Handle of hash table in which to create entry */ + const char *key, /**< In: C-style NULL-terminated key string + for the new entry */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Replace a 32-bit integer value in a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_replace_int32(h,k,v) \ + ((int32)(long)hash_table_replace((h),(k),(void *)(long)(v))) + +/** + * Delete an entry with given key and associated value to hash table + * h. Return the value associated with the key (NULL if it did not exist) + */ + +SPHINXBASE_EXPORT +void *hash_table_delete(hash_table_t *h, /**< In: Handle of hash table in + which a key will be deleted */ + const char *key /**< In: C-style NULL-terminated + key string for the new entry */ + ); + +/** + * Like hash_table_delete, but with an explicitly specified key length, + * instead of a NULL-terminated, C-style key string. So the key + * string is a binary key (or bkey). Hash tables containing such keys + * should be created with the HASH_CASE_YES option. Otherwise, the + * results are unpredictable. + */ +SPHINXBASE_EXPORT +void *hash_table_delete_bkey(hash_table_t *h, /**< In: Handle of hash table in + which a key will be deleted */ + const char *key, /**< In: C-style NULL-terminated + key string for the new entry */ + size_t len + ); + +/** + * Delete all entries from a hash_table. + */ +SPHINXBASE_EXPORT +void hash_table_empty(hash_table_t *h /**< In: Handle of hash table */ + ); + +/** + * Like hash_table_enter, but with an explicitly specified key length, + * instead of a NULL-terminated, C-style key string. So the key + * string is a binary key (or bkey). Hash tables containing such keys + * should be created with the HASH_CASE_YES option. Otherwise, the + * results are unpredictable. + */ +SPHINXBASE_EXPORT +void *hash_table_enter_bkey(hash_table_t *h, /**< In: Handle of hash table + in which to create entry */ + const char *key, /**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Enter a 32-bit integer value in a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_enter_bkey_int32(h,k,l,v) \ + ((int32)(long)hash_table_enter_bkey((h),(k),(l),(void *)(long)(v))) + +/** + * Like hash_table_replace, but with an explicitly specified key length, + * instead of a NULL-terminated, C-style key string. So the key + * string is a binary key (or bkey). Hash tables containing such keys + * should be created with the HASH_CASE_YES option. Otherwise, the + * results are unpredictable. + */ +SPHINXBASE_EXPORT +void *hash_table_replace_bkey(hash_table_t *h, /**< In: Handle of hash table in which to create entry */ + const char *key, /**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Replace a 32-bit integer value in a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_replace_bkey_int32(h,k,l,v) \ + ((int32)(long)hash_table_replace_bkey((h),(k),(l),(void *)(long)(v))) + +/** + * Look up a key in a hash table and optionally return the associated + * value. + * @return 0 if key found in hash table, else -1. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup(hash_table_t *h, /**< In: Handle of hash table being searched */ + const char *key, /**< In: C-style NULL-terminated string whose value is sought */ + void **val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Look up a 32-bit integer value in a hash table. + * + * This function is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup_int32(hash_table_t *h, /**< In: Handle of hash table being searched */ + const char *key, /**< In: C-style NULL-terminated string whose value is sought */ + int32 *val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, + * C-style key string. So the key string is a binary key (or bkey). Hash tables containing + * such keys should be created with the HASH_CASE_YES option. Otherwise, the results are + * unpredictable. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup_bkey(hash_table_t *h, /**< In: Handle of hash table being searched */ + const char *key, /**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + void **val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Look up a 32-bit integer value in a hash table. + * + * This function is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup_bkey_int32(hash_table_t *h,/**< In: Handle of hash table being searched */ + const char *key,/**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + int32 *val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Start iterating over key-value pairs in a hash table. + */ +SPHINXBASE_EXPORT +hash_iter_t *hash_table_iter(hash_table_t *h); + +/** + * Get the next key-value pair in iteration. + * + * This function automatically frees the iterator object upon reaching + * the final entry. + * + * @return the next entry in the hash table, or NULL if done. + */ +SPHINXBASE_EXPORT +hash_iter_t *hash_table_iter_next(hash_iter_t *itor); + +/** + * Delete an unfinished iterator. + */ +SPHINXBASE_EXPORT +void hash_table_iter_free(hash_iter_t *itor); + +/** + * Build a glist of valid hash_entry_t pointers from the given hash table. Return the list. + */ +SPHINXBASE_EXPORT +glist_t hash_table_tolist(hash_table_t *h, /**< In: Hash table from which list is to be generated */ + int32 *count /**< Out: Number of entries in the list. + If this is NULL, no count will be returned. */ + + ); + +/** + * Display a hash-with-chaining representation on the screen. + * Currently, it will only works for situation where hash_enter was + * used to enter the keys. + */ +SPHINXBASE_EXPORT +void hash_table_display(hash_table_t *h, /**< In: Hash table to display */ + int32 showkey /**< In: Show the string or not, + Use 0 if hash_enter_bkey was + used. */ + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/heap.h b/media/sphinxbase/sphinxbase/heap.h new file mode 100644 index 000000000..ad3756aaf --- /dev/null +++ b/media/sphinxbase/sphinxbase/heap.h @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * heap.h -- Generic heap structure for inserting in any and popping in sorted + * order. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: heap.h,v $ + * Revision 1.7 2005/06/22 03:05:49 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add keyword. + * + * Revision 1.4 2005/06/15 04:21:46 archan + * 1, Fixed doxygen-documentation, 2, Add keyword such that changes will be logged into a file. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 23-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started. + */ + + +#ifndef _LIBUTIL_HEAP_H_ +#define _LIBUTIL_HEAP_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + + /** \file heap.h + * \brief Heap Implementation. + * + * General Comment: Sorted heap structure with three main operations: + * + * 1. Insert a data item (with two attributes: an application supplied pointer and an + * integer value; the heap is maintained in ascending order of the integer value). + * 2. Return the currently topmost item (i.e., item with smallest associated value). + * 3. Return the currently topmost item and pop it off the heap. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + + +typedef struct heap_s heap_t; + + +/** + * Allocate a new heap and return handle to it. + */ +SPHINXBASE_EXPORT +heap_t *heap_new(void); + + +/** + * Insert a new item into the given heap. + * Return value: 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int heap_insert(heap_t *heap, /**< In: Heap into which item is to be inserted */ + void *data, /**< In: Application-determined data pointer */ + int32 val /**< In: According to item entered in sorted heap */ + ); +/** + * Return the topmost item in the heap. + * Return value: 1 if heap is not empty and the topmost value is returned; + * 0 if heap is empty; -1 if some error occurred. + */ +SPHINXBASE_EXPORT +int heap_top(heap_t *heap, /**< In: Heap whose topmost item is to be returned */ + void **data, /**< Out: Data pointer associated with the topmost item */ + int32 *val /**< Out: Value associated with the topmost item */ + ); +/** + * Like heap_top but also pop the top item off the heap. + */ +SPHINXBASE_EXPORT +int heap_pop(heap_t *heap, void **data, int32 *val); + +/** + * Remove an item from the heap. + */ +SPHINXBASE_EXPORT +int heap_remove(heap_t *heap, void *data); + +/** + * Return the number of items in the heap. + */ +SPHINXBASE_EXPORT +size_t heap_size(heap_t *heap); + +/** + * Destroy the given heap; free the heap nodes. NOTE: Data pointers in the nodes are NOT freed. + * Return value: 0 if successful, -1 otherwise. + */ + +SPHINXBASE_EXPORT +int heap_destroy(heap_t *heap); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/huff_code.h b/media/sphinxbase/sphinxbase/huff_code.h new file mode 100644 index 000000000..6365e2299 --- /dev/null +++ b/media/sphinxbase/sphinxbase/huff_code.h @@ -0,0 +1,143 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2009 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file huff_code.h + * @brief Huffman code and bitstream implementation + * + * This interface supports building canonical Huffman codes from + * string and integer values. It also provides support for encoding + * and decoding from strings and files, and for reading and writing + * codebooks from files. + */ + +#ifndef __HUFF_CODE_H__ +#define __HUFF_CODE_H__ + +#include + +#include +#include +#include + +typedef struct huff_code_s huff_code_t; + +/** + * Create a codebook from 32-bit integer data. + */ +SPHINXBASE_EXPORT +huff_code_t *huff_code_build_int(int32 const *values, int32 const *frequencies, int nvals); + +/** + * Create a codebook from string data. + */ +SPHINXBASE_EXPORT +huff_code_t *huff_code_build_str(char * const *values, int32 const *frequencies, int nvals); + +/** + * Read a codebook from a file. + */ +SPHINXBASE_EXPORT +huff_code_t *huff_code_read(FILE *infh); + +/** + * Write a codebook to a file. + */ +SPHINXBASE_EXPORT +int huff_code_write(huff_code_t *hc, FILE *outfh); + +/** + * Print a codebook to a file as text (for debugging) + */ +SPHINXBASE_EXPORT +int huff_code_dump(huff_code_t *hc, FILE *dumpfh); + +/** + * Retain a pointer to a Huffman codec object. + */ +SPHINXBASE_EXPORT +huff_code_t *huff_code_retain(huff_code_t *hc); + +/** + * Release a pointer to a Huffman codec object. + */ +SPHINXBASE_EXPORT +int huff_code_free(huff_code_t *hc); + +/** + * Attach a Huffman codec to a file handle for input/output. + */ +SPHINXBASE_EXPORT +FILE *huff_code_attach(huff_code_t *hc, FILE *fh, char const *mode); + +/** + * Detach a Huffman codec from its file handle. + */ +SPHINXBASE_EXPORT +FILE *huff_code_detach(huff_code_t *hc); + +/** + * Encode an integer, writing it to the file handle, if any. + */ +SPHINXBASE_EXPORT +int huff_code_encode_int(huff_code_t *hc, int32 sym, uint32 *outcw); + +/** + * Encode a string, writing it to the file handle, if any. + */ +SPHINXBASE_EXPORT +int huff_code_encode_str(huff_code_t *hc, char const *sym, uint32 *outcw); + +/** + * Decode an integer, reading it from the file if no data given. + */ +SPHINXBASE_EXPORT +int huff_code_decode_int(huff_code_t *hc, int *outval, + char const **inout_data, + size_t *inout_data_len, + int *inout_offset); + +/** + * Decode a string, reading it from the file if no data given. + */ +SPHINXBASE_EXPORT +char const *huff_code_decode_str(huff_code_t *hc, + char const **inout_data, + size_t *inout_data_len, + int *inout_offset); + +#endif /* __HUFF_CODE_H__ */ diff --git a/media/sphinxbase/sphinxbase/jsgf.h b/media/sphinxbase/sphinxbase/jsgf.h new file mode 100644 index 000000000..3c3de1de6 --- /dev/null +++ b/media/sphinxbase/sphinxbase/jsgf.h @@ -0,0 +1,209 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __JSGF_H__ +#define __JSGF_H__ + +/** + * @file jsgf.h JSGF grammar compiler + * + * This file defines the data structures for parsing JSGF grammars + * into Sphinx finite-state grammars. + **/ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +typedef struct jsgf_s jsgf_t; +typedef struct jsgf_rule_s jsgf_rule_t; + +/** + * Create a new JSGF grammar. + * + * @param parent optional parent grammar for this one (NULL, usually). + * @return new JSGF grammar object, or NULL on failure. + */ +SPHINXBASE_EXPORT +jsgf_t *jsgf_grammar_new(jsgf_t *parent); + +/** + * Parse a JSGF grammar from a file. + * + * @param filename the name of the file to parse. + * @param parent optional parent grammar for this one (NULL, usually). + * @return new JSGF grammar object, or NULL on failure. + */ +SPHINXBASE_EXPORT +jsgf_t *jsgf_parse_file(const char *filename, jsgf_t *parent); + +/** + * Parse a JSGF grammar from a string. + * + * @param 0-terminated string with grammar. + * @param parent optional parent grammar for this one (NULL, usually). + * @return new JSGF grammar object, or NULL on failure. + */ +SPHINXBASE_EXPORT +jsgf_t *jsgf_parse_string(const char *string, jsgf_t *parent); + +/** + * Get the grammar name from the file. + */ +SPHINXBASE_EXPORT +char const *jsgf_grammar_name(jsgf_t *jsgf); + +/** + * Free a JSGF grammar. + */ +SPHINXBASE_EXPORT +void jsgf_grammar_free(jsgf_t *jsgf); + +/** + * Iterator over rules in a grammar. + */ +typedef hash_iter_t jsgf_rule_iter_t; + +/** + * Get an iterator over all rules in a grammar. + */ +SPHINXBASE_EXPORT +jsgf_rule_iter_t *jsgf_rule_iter(jsgf_t *grammar); + +/** + * Advance an iterator to the next rule in the grammar. + */ +#define jsgf_rule_iter_next(itor) hash_table_iter_next(itor) + +/** + * Get the current rule in a rule iterator. + */ +#define jsgf_rule_iter_rule(itor) ((jsgf_rule_t *)(itor)->ent->val) + +/** + * Free a rule iterator (if the end hasn't been reached). + */ +#define jsgf_rule_iter_free(itor) hash_table_iter_free(itor) + +/** + * Get a rule by name from a grammar. Name should not contain brackets. + */ +SPHINXBASE_EXPORT +jsgf_rule_t *jsgf_get_rule(jsgf_t *grammar, const char *name); + +/** + * Returns the first public rule of the grammar + */ +SPHINXBASE_EXPORT +jsgf_rule_t *jsgf_get_public_rule(jsgf_t *grammar); + +/** + * Get the rule name from a rule. + */ +SPHINXBASE_EXPORT +char const *jsgf_rule_name(jsgf_rule_t *rule); + +/** + * Test if a rule is public or not. + */ +SPHINXBASE_EXPORT +int jsgf_rule_public(jsgf_rule_t *rule); + +/** + * Build a Sphinx FSG object from a JSGF rule. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw); + +/** + * Build a Sphinx FSG object from a JSGF rule. + * + * This differs from jsgf_build_fsg() in that it does not do closure + * on epsilon transitions or any other postprocessing. For the time + * being this is necessary in order to write it to a file - the FSG + * code will be fixed soon. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw); + + +/** + * Read JSGF from file and return FSG object from it. + * + * This function looks for a first public rule in jsgf and constructs JSGF from it. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_read_file(const char *file, logmath_t * lmath, float32 lw); + +/** + * Read JSGF from string and return FSG object from it. + * + * This function looks for a first public rule in jsgf and constructs JSGF from it. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_read_string(const char *string, logmath_t * lmath, float32 lw); + + +/** + * Convert a JSGF rule to Sphinx FSG text form. + * + * This does a direct conversion without doing transitive closure on + * null transitions and so forth. + */ +SPHINXBASE_EXPORT +int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh); + +#ifdef __cplusplus +} +#endif + + +#endif /* __JSGF_H__ */ diff --git a/media/sphinxbase/sphinxbase/listelem_alloc.h b/media/sphinxbase/sphinxbase/listelem_alloc.h new file mode 100644 index 000000000..91b21d177 --- /dev/null +++ b/media/sphinxbase/sphinxbase/listelem_alloc.h @@ -0,0 +1,125 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __LISTELEM_ALLOC_H__ +#define __LISTELEM_ALLOC_H__ + +/** @file listelem_alloc.h + * @brief Fast memory allocator for uniformly sized objects + * @author M K Ravishankar + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#include +#ifdef S60 +#include +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** + * List element allocator object. + */ +typedef struct listelem_alloc_s listelem_alloc_t; + +/** + * Initialize and return a list element allocator. + */ +SPHINXBASE_EXPORT +listelem_alloc_t * listelem_alloc_init(size_t elemsize); + +/** + * Finalize and release all memory associated with a list element allocator. + */ +SPHINXBASE_EXPORT +void listelem_alloc_free(listelem_alloc_t *le); + + +SPHINXBASE_EXPORT +void *__listelem_malloc__(listelem_alloc_t *le, char *file, int line); + +/** + * Allocate a list element and return pointer to it. + */ +#define listelem_malloc(le) __listelem_malloc__((le),__FILE__,__LINE__) + +SPHINXBASE_EXPORT +void *__listelem_malloc_id__(listelem_alloc_t *le, char *file, int line, + int32 *out_id); + +/** + * Allocate a list element, returning a unique identifier. + */ +#define listelem_malloc_id(le, oid) __listelem_malloc_id__((le),__FILE__,__LINE__,(oid)) + +/** + * Retrieve a list element by its identifier. + */ +SPHINXBASE_EXPORT +void *listelem_get_item(listelem_alloc_t *le, int32 id); + +/** + * Free list element of given size + */ +SPHINXBASE_EXPORT +void __listelem_free__(listelem_alloc_t *le, void *elem, char *file, int line); + +/** + * Macro of __listelem_free__ + */ +#define listelem_free(le,el) __listelem_free__((le),(el),__FILE__,__LINE__) + +/** + Print number of allocation, numer of free operation stats +*/ +SPHINXBASE_EXPORT +void listelem_stats(listelem_alloc_t *le); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/logmath.h b/media/sphinxbase/sphinxbase/logmath.h new file mode 100644 index 000000000..21caf45dd --- /dev/null +++ b/media/sphinxbase/sphinxbase/logmath.h @@ -0,0 +1,237 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file logmath.h + * @brief Fast integer logarithmic addition operations. + * + * In evaluating HMM models, probability values are often kept in log + * domain, to avoid overflow. To enable these logprob values to be + * held in int32 variables without significant loss of precision, a + * logbase of (1+epsilon) (where epsilon < 0.01 or so) is used. This + * module maintains this logbase (B). + * + * However, maintaining probabilities in log domain creates a problem + * when adding two probability values. This problem can be solved by + * table lookup. Note that: + * + * - \f$ b^z = b^x + b^y \f$ + * - \f$ b^z = b^x(1 + b^{y-x}) = b^y(1 + e^{x-y}) \f$ + * - \f$ z = x + log_b(1 + b^{y-x}) = y + log_b(1 + b^{x-y}) \f$ + * + * So: + * + * - when \f$ y > x, z = y + logadd\_table[-(x-y)] \f$ + * - when \f$ x > y, z = x + logadd\_table[-(y-x)] \f$ + * - where \f$ logadd\_table[n] = log_b(1 + b^{-n}) \f$ + * + * The first entry in logadd_table is + * simply \f$ log_b(2.0) \f$, for + * the case where \f$ y = x \f$ and thus + * \f$ z = log_b(2x) = log_b(2) + x \f$. The last entry is zero, + * where \f$ log_b(x+y) = x = y \f$ due to loss of precision. + * + * Since this table can be quite large particularly for small + * logbases, an option is provided to compress it by dropping the + * least significant bits of the table. + */ + +#ifndef __LOGMATH_H__ +#define __LOGMATH_H__ + +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Integer log math computation table. + * + * This is exposed here to allow log-add computations to be inlined. + */ +typedef struct logadd_s logadd_t; +struct logadd_s { + /** Table, in unsigned integers of (width) bytes. */ + void *table; + /** Number of elements in (table). This is never smaller than 256 (important!) */ + uint32 table_size; + /** Width of elements of (table). */ + uint8 width; + /** Right shift applied to elements in (table). */ + int8 shift; +}; + +/** + * Integer log math computation class. + */ +typedef struct logmath_s logmath_t; + +/** + * Obtain the log-add table from a logmath_t * + */ +#define LOGMATH_TABLE(lm) ((logadd_t *)lm) + +/** + * Initialize a log math computation table. + * @param base The base B in which computation is to be done. + * @param shift Log values are shifted right by this many bits. + * @param use_table Whether to use an add table or not + * @return The newly created log math table. + */ +SPHINXBASE_EXPORT +logmath_t *logmath_init(float64 base, int shift, int use_table); + +/** + * Memory-map (or read) a log table from a file. + */ +SPHINXBASE_EXPORT +logmath_t *logmath_read(const char *filename); + +/** + * Write a log table to a file. + */ +SPHINXBASE_EXPORT +int32 logmath_write(logmath_t *lmath, const char *filename); + +/** + * Get the log table size and dimensions. + */ +SPHINXBASE_EXPORT +int32 logmath_get_table_shape(logmath_t *lmath, uint32 *out_size, + uint32 *out_width, uint32 *out_shift); + +/** + * Get the log base. + */ +SPHINXBASE_EXPORT +float64 logmath_get_base(logmath_t *lmath); + +/** + * Get the smallest possible value represented in this base. + */ +SPHINXBASE_EXPORT +int logmath_get_zero(logmath_t *lmath); + +/** + * Get the width of the values in a log table. + */ +SPHINXBASE_EXPORT +int logmath_get_width(logmath_t *lmath); + +/** + * Get the shift of the values in a log table. + */ +SPHINXBASE_EXPORT +int logmath_get_shift(logmath_t *lmath); + +/** + * Retain ownership of a log table. + * + * @return pointer to retained log table. + */ +SPHINXBASE_EXPORT +logmath_t *logmath_retain(logmath_t *lmath); + +/** + * Free a log table. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int logmath_free(logmath_t *lmath); + +/** + * Add two values in log space exactly and slowly (without using add table). + */ +SPHINXBASE_EXPORT +int logmath_add_exact(logmath_t *lmath, int logb_p, int logb_q); + +/** + * Add two values in log space (i.e. return log(exp(p)+exp(q))) + */ +SPHINXBASE_EXPORT +int logmath_add(logmath_t *lmath, int logb_p, int logb_q); + +/** + * Convert linear floating point number to integer log in base B. + */ +SPHINXBASE_EXPORT +int logmath_log(logmath_t *lmath, float64 p); + +/** + * Convert integer log in base B to linear floating point. + */ +SPHINXBASE_EXPORT +float64 logmath_exp(logmath_t *lmath, int logb_p); + +/** + * Convert natural log (in floating point) to integer log in base B. + */ +SPHINXBASE_EXPORT +int logmath_ln_to_log(logmath_t *lmath, float64 log_p); + +/** + * Convert integer log in base B to natural log (in floating point). + */ +SPHINXBASE_EXPORT +float64 logmath_log_to_ln(logmath_t *lmath, int logb_p); + +/** + * Convert base 10 log (in floating point) to integer log in base B. + */ +SPHINXBASE_EXPORT +int logmath_log10_to_log(logmath_t *lmath, float64 log_p); + +/** + * Convert integer log in base B to base 10 log (in floating point). + */ +SPHINXBASE_EXPORT +float64 logmath_log_to_log10(logmath_t *lmath, int logb_p); + +#ifdef __cplusplus +} +#endif + + +#endif /* __LOGMATH_H__ */ diff --git a/media/sphinxbase/sphinxbase/matrix.h b/media/sphinxbase/sphinxbase/matrix.h new file mode 100644 index 000000000..b7c92e073 --- /dev/null +++ b/media/sphinxbase/sphinxbase/matrix.h @@ -0,0 +1,210 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1997-2000 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: matrix.h + * + * Description: Matrix and linear algebra functions + * + * Author: + * + *********************************************************************/ + +#ifndef MATRIX_H +#define MATRIX_H + +/** \file matrix.h + * \brief Matrix and linear algebra functions. + * + * This file contains some basic matrix and linear algebra operations. + * In general these operate on positive definite matrices ONLY, + * because all matrices we're likely to encounter are either + * covariance matrices or are derived from them, and therefore a + * non-positive-definite matrix indicates some kind of pathological + * condition. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + + +/** + * Norm an array + * @param arr array + * @param d1 dimension + * @param d2 dimension + * @param d3 dimension + **/ +SPHINXBASE_EXPORT void norm_3d(float32 ***arr, uint32 d1, uint32 d2, uint32 d3); + +/** + * Floor 3-d array + * @param out output array + * @para in input array + * @param d1 dimension + * @param d2 dimension + * @param d3 dimension + **/ +SPHINXBASE_EXPORT void +accum_3d(float32 ***out, float32 ***in, uint32 d1, uint32 d2, uint32 d3); + +/** Ensures that non-zero values x such that -band < x < band, band > 0 are set to -band if x < 0 and band if x > 0. + * @param v array + * @param d1 array size + * @param band band value + */ +SPHINXBASE_EXPORT void band_nz_1d(float32 *v, uint32 d1, float32 band); + +/** + * Floor 3-d array + * @param m array + * @param d1 dimension + * @param d2 dimension + * @param d3 dimension + * @param floor floor value + **/ +SPHINXBASE_EXPORT void floor_nz_3d(float32 ***m, uint32 d1, uint32 d2, uint32 d3, float32 floor); + +/** + * Floor 1-d array + * @param m array + * @param d1 dimension + * @param floor floor value + **/ +SPHINXBASE_EXPORT void floor_nz_1d(float32 *v, uint32 d1, float32 floor); + +/** + * Calculate the determinant of a positive definite matrix. + * @param a The input matrix, must be positive definite. + * @param len The dimension of the input matrix. + * @return The determinant of the input matrix, or -1.0 if the matrix is + * not positive definite. + * + * \note These can be vanishingly small hence the float64 return type. + * Also note that only the upper triangular portion of a is + * considered, therefore the check for positive-definiteness is not + * reliable. + **/ +SPHINXBASE_EXPORT +float64 determinant(float32 **a, int32 len); + +/** + * Invert (if possible) a positive definite matrix. + * @param out_ainv The inverse of a will be stored here. + * @param a The input matrix, must be positive definite. + * @param len The dimension of the input matrix. + * @return 0 for success or -1 for a non-positive-definite matrix. + * + * \note Only the upper triangular portion of a is considered, + * therefore the check for positive-definiteness is not reliable. + **/ +SPHINXBASE_EXPORT +int32 invert(float32 **out_ainv, float32 **a, int32 len); + +/** + * Solve (if possible) a positive-definite system of linear equations AX=B for X. + * @param a The A matrix on the left-hand side of the equation, must be positive-definite. + * @param b The B vector on the right-hand side of the equation. + * @param out_x The X vector will be stored here. + * @param n The dimension of the A matrix (n by n) and the B and X vectors. + * @return 0 for success or -1 for a non-positive-definite matrix. + * + * \note Only the upper triangular portion of a is considered, + * therefore the check for positive-definiteness is not reliable. + **/ +SPHINXBASE_EXPORT +int32 solve(float32 **a, float32 *b, + float32 *out_x, int32 n); + +/** + * Calculate the outer product of two vectors. + * @param out_a A (pre-allocated) len x len array. The outer product + * will be stored here. + * @param x A vector of length len. + * @param y A vector of length len. + * @param len The length of the input vectors. + **/ +SPHINXBASE_EXPORT +void outerproduct(float32 **out_a, float32 *x, float32 *y, int32 len); + +/** + * Multiply C=AB where A and B are symmetric matrices. + * @param out_c The output matrix C. + * @param a The input matrix A. + * @param b The input matrix B. + * @param n Dimensionality of A and B. + **/ +SPHINXBASE_EXPORT +void matrixmultiply(float32 **out_c, /* = */ + float32 **a, /* * */ float32 **b, + int32 n); + +/** + * Multiply a symmetric matrix by a constant in-place. + * @param inout_a The matrix to multiply. + * @param x The constant to multiply it by. + * @param n dimension of a. + **/ +SPHINXBASE_EXPORT +void scalarmultiply(float32 **inout_a, float32 x, int32 n); + +/** + * Add A += B. + * @param inout_a The A matrix to add. + * @param b The B matrix to add to A. + * @param n dimension of a and b. + **/ +SPHINXBASE_EXPORT +void matrixadd(float32 **inout_a, float32 **b, int32 n); + +#if 0 +{ /* Fool indent. */ +#endif +#ifdef __cplusplus +} +#endif + +#endif /* MATRIX_H */ + diff --git a/media/sphinxbase/sphinxbase/mmio.h b/media/sphinxbase/sphinxbase/mmio.h new file mode 100644 index 000000000..76127339c --- /dev/null +++ b/media/sphinxbase/sphinxbase/mmio.h @@ -0,0 +1,85 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file mmio.h + * @brief Memory-mapped I/O wrappers for files. + * @author David Huggins-Daines + **/ + +#ifndef __MMIO_H__ +#define __MMIO_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Abstract structure representing a memory-mapped file. + **/ +typedef struct mmio_file_s mmio_file_t; + +/** + * Memory-map a file for reading. + * @return a mmio_file_t * or NULL for failure. + **/ +SPHINXBASE_EXPORT +mmio_file_t *mmio_file_read(const char *filename); + +/** + * Get a pointer to the memory mapped for a file. + **/ +SPHINXBASE_EXPORT +void *mmio_file_ptr(mmio_file_t *mf); + +/** + * Unmap a file, releasing memory associated with it. + **/ +SPHINXBASE_EXPORT +void mmio_file_unmap(mmio_file_t *mf); + +#ifdef __cplusplus +} +#endif + + +#endif /* __MMIO_H__ */ diff --git a/media/sphinxbase/sphinxbase/mulaw.h b/media/sphinxbase/sphinxbase/mulaw.h new file mode 100644 index 000000000..49561303f --- /dev/null +++ b/media/sphinxbase/sphinxbase/mulaw.h @@ -0,0 +1,99 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * muLaw.h -- Table for converting mu-law data into 16-bit linear PCM format. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 21-Jul-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from Sunil Issar's version. + */ + +/** + * \file mulaw.h + * \brief Table for converting mu-law data into 16-bit linear PCM format. + */ + + +#ifndef _MULAW_H_ +#define _MULAW_H_ + + +static int16 muLaw[256] = { + -0x1f5f, -0x1e5f, -0x1d5f, -0x1c5f, -0x1b5f, -0x1a5f, -0x195f, -0x185f, + -0x175f, -0x165f, -0x155f, -0x145f, -0x135f, -0x125f, -0x115f, -0x105f, + -0x0f9f, -0x0f1f, -0x0e9f, -0x0e1f, -0x0d9f, -0x0d1f, -0x0c9f, -0x0c1f, + -0x0b9f, -0x0b1f, -0x0a9f, -0x0a1f, -0x099f, -0x091f, -0x089f, -0x081f, + -0x07bf, -0x077f, -0x073f, -0x06ff, -0x06bf, -0x067f, -0x063f, -0x05ff, + -0x05bf, -0x057f, -0x053f, -0x04ff, -0x04bf, -0x047f, -0x043f, -0x03ff, + -0x03cf, -0x03af, -0x038f, -0x036f, -0x034f, -0x032f, -0x030f, -0x02ef, + -0x02cf, -0x02af, -0x028f, -0x026f, -0x024f, -0x022f, -0x020f, -0x01ef, + -0x01d7, -0x01c7, -0x01b7, -0x01a7, -0x0197, -0x0187, -0x0177, -0x0167, + -0x0157, -0x0147, -0x0137, -0x0127, -0x0117, -0x0107, -0x00f7, -0x00e7, + -0x00db, -0x00d3, -0x00cb, -0x00c3, -0x00bb, -0x00b3, -0x00ab, -0x00a3, + -0x009b, -0x0093, -0x008b, -0x0083, -0x007b, -0x0073, -0x006b, -0x0063, + -0x005d, -0x0059, -0x0055, -0x0051, -0x004d, -0x0049, -0x0045, -0x0041, + -0x003d, -0x0039, -0x0035, -0x0031, -0x002d, -0x0029, -0x0025, -0x0021, + -0x001e, -0x001c, -0x001a, -0x0018, -0x0016, -0x0014, -0x0012, -0x0010, + -0x000e, -0x000c, -0x000a, -0x0008, -0x0006, -0x0004, -0x0002, 0x0000, + 0x1f5f, 0x1e5f, 0x1d5f, 0x1c5f, 0x1b5f, 0x1a5f, 0x195f, 0x185f, + 0x175f, 0x165f, 0x155f, 0x145f, 0x135f, 0x125f, 0x115f, 0x105f, + 0x0f9f, 0x0f1f, 0x0e9f, 0x0e1f, 0x0d9f, 0x0d1f, 0x0c9f, 0x0c1f, + 0x0b9f, 0x0b1f, 0x0a9f, 0x0a1f, 0x099f, 0x091f, 0x089f, 0x081f, + 0x07bf, 0x077f, 0x073f, 0x06ff, 0x06bf, 0x067f, 0x063f, 0x05ff, + 0x05bf, 0x057f, 0x053f, 0x04ff, 0x04bf, 0x047f, 0x043f, 0x03ff, + 0x03cf, 0x03af, 0x038f, 0x036f, 0x034f, 0x032f, 0x030f, 0x02ef, + 0x02cf, 0x02af, 0x028f, 0x026f, 0x024f, 0x022f, 0x020f, 0x01ef, + 0x01d7, 0x01c7, 0x01b7, 0x01a7, 0x0197, 0x0187, 0x0177, 0x0167, + 0x0157, 0x0147, 0x0137, 0x0127, 0x0117, 0x0107, 0x00f7, 0x00e7, + 0x00db, 0x00d3, 0x00cb, 0x00c3, 0x00bb, 0x00b3, 0x00ab, 0x00a3, + 0x009b, 0x0093, 0x008b, 0x0083, 0x007b, 0x0073, 0x006b, 0x0063, + 0x005d, 0x0059, 0x0055, 0x0051, 0x004d, 0x0049, 0x0045, 0x0041, + 0x003d, 0x0039, 0x0035, 0x0031, 0x002d, 0x0029, 0x0025, 0x0021, + 0x001e, 0x001c, 0x001a, 0x0018, 0x0016, 0x0014, 0x0012, 0x0010, + 0x000e, 0x000c, 0x000a, 0x0008, 0x0006, 0x0004, 0x0002, 0x0000, +}; + + +#endif diff --git a/media/sphinxbase/sphinxbase/ngram_model.h b/media/sphinxbase/sphinxbase/ngram_model.h new file mode 100644 index 000000000..f441ef7da --- /dev/null +++ b/media/sphinxbase/sphinxbase/ngram_model.h @@ -0,0 +1,711 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ngram_model.h + * @brief N-Gram language models + * @author David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_H__ +#define __NGRAM_MODEL_H__ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Abstract type representing an N-Gram based language model. + */ +typedef struct ngram_model_s ngram_model_t; + +/** + * Abstract type representing a word class in an N-Gram model. + */ +typedef struct ngram_class_s ngram_class_t; + +/** + * File types for N-Gram files + */ +typedef enum ngram_file_type_e { + NGRAM_INVALID = -1, /**< Not a valid file type. */ + NGRAM_AUTO, /**< Determine file type automatically. */ + NGRAM_ARPA, /**< ARPABO text format (the standard). */ + NGRAM_DMP, /**< Sphinx .DMP format. */ + NGRAM_DMP32, /**< Sphinx .DMP32 format (NOT SUPPORTED) */ +} ngram_file_type_t; + +#define NGRAM_INVALID_WID -1 /**< Impossible word ID */ + +/** + * Read an N-Gram model from a file on disk. + * + * @param config Optional pointer to a set of command-line arguments. + * Recognized arguments are: + * + * - -mmap (boolean) whether to use memory-mapped I/O + * - -lw (float32) language weight to apply to the model + * - -wip (float32) word insertion penalty to apply to the model + * - -uw (float32) unigram weight to apply to the model + * + * @param file_name path to the file to read. + * @param file_type type of the file, or NGRAM_AUTO to determine automatically. + * @param lmath Log-math parameters to use for probability + * calculations. Ownership of this object is assumed by + * the newly created ngram_model_t, and you should not + * attempt to free it manually. If you wish to reuse it + * elsewhere, you must retain it with logmath_retain(). + * @return newly created ngram_model_t. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_read(cmd_ln_t *config, + const char *file_name, + ngram_file_type_t file_type, + logmath_t *lmath); + +/** + * Write an N-Gram model to disk. + * + * @return 0 for success, <0 on error + */ +SPHINXBASE_EXPORT +int ngram_model_write(ngram_model_t *model, const char *file_name, + ngram_file_type_t format); + +/** + * Guess the file type for an N-Gram model from the filename. + * + * @return the guessed file type, or NGRAM_INVALID if none could be guessed. + */ +SPHINXBASE_EXPORT +ngram_file_type_t ngram_file_name_to_type(const char *file_name); + +/** + * Get the N-Gram file type from a string. + * + * @return file type, or NGRAM_INVALID if no such file type exists. + */ +SPHINXBASE_EXPORT +ngram_file_type_t ngram_str_to_type(const char *str_name); + +/** + * Get the canonical name for an N-Gram file type. + * + * @return read-only string with the name for this file type, or NULL + * if no such type exists. + */ +SPHINXBASE_EXPORT +char const *ngram_type_to_str(int type); + +/** + * Retain ownership of an N-Gram model. + * + * @return Pointer to retained model. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_retain(ngram_model_t *model); + +/** + * Release memory associated with an N-Gram model. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int ngram_model_free(ngram_model_t *model); + +/** + * Constants for case folding. + */ +typedef enum ngram_case_e { + NGRAM_UPPER, + NGRAM_LOWER +} ngram_case_t; + +/** + * Case-fold word strings in an N-Gram model. + * + * WARNING: This is not Unicode aware, so any non-ASCII characters + * will not be converted. + */ +SPHINXBASE_EXPORT +int ngram_model_casefold(ngram_model_t *model, int kase); + +/** + * Apply a language weight, insertion penalty, and unigram weight to a + * language model. + * + * This will change the values output by ngram_score() and friends. + * This is done for efficiency since in decoding, these are the only + * values we actually need. Call ngram_prob() if you want the "raw" + * N-Gram probability estimate. + * + * To remove all weighting, call ngram_apply_weights(model, 1.0, 1.0, 1.0). + */ +SPHINXBASE_EXPORT +int ngram_model_apply_weights(ngram_model_t *model, + float32 lw, float32 wip, float32 uw); + +/** + * Get the current weights from a language model. + * + * @param model The model in question. + * @param out_log_wip Output: (optional) logarithm of word insertion penalty. + * @param out_log_uw Output: (optional) logarithm of unigram weight. + * @return language weight. + */ +SPHINXBASE_EXPORT +float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip, + int32 *out_log_uw); + +/** + * Get the score (scaled, interpolated log-probability) for a general + * N-Gram. + * + * The argument list consists of the history words (as null-terminated + * strings) of the N-Gram, in reverse order, followed by NULL. + * Therefore, if you wanted to get the N-Gram score for "a whole joy", + * you would call: + * + *
+ *  score = ngram_score(model, "joy", "whole", "a", NULL);
+ * 
+ * + * This is not the function to use in decoding, because it has some + * overhead for looking up words. Use ngram_ng_score(), + * ngram_tg_score(), or ngram_bg_score() instead. In the future there + * will probably be a version that takes a general language model + * state object, to support suffix-array LM and things like that. + * + * If one of the words is not in the LM's vocabulary, the result will + * depend on whether this is an open or closed vocabulary language + * model. For an open-vocabulary model, unknown words are all mapped + * to the unigram <UNK> which has a non-zero probability and also + * participates in higher-order N-Grams. Therefore, you will get a + * score of some sort in this case. + * + * For a closed-vocabulary model, unknown words are impossible and + * thus have zero probability. Therefore, if word is + * unknown, this function will return a "zero" log-probability, i.e. a + * large negative number. To obtain this number for comparison, call + * ngram_zero(). + */ +SPHINXBASE_EXPORT +int32 ngram_score(ngram_model_t *model, const char *word, ...); + +/** + * Quick trigram score lookup. + */ +SPHINXBASE_EXPORT +int32 ngram_tg_score(ngram_model_t *model, + int32 w3, int32 w2, int32 w1, + int32 *n_used); + +/** + * Quick bigram score lookup. + */ +SPHINXBASE_EXPORT +int32 ngram_bg_score(ngram_model_t *model, + int32 w2, int32 w1, + int32 *n_used); + +/** + * Quick general N-Gram score lookup. + */ +SPHINXBASE_EXPORT +int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history, + int32 n_hist, int32 *n_used); + +/** + * Get the "raw" log-probability for a general N-Gram. + * + * This returns the log-probability of an N-Gram, as defined in the + * language model file, before any language weighting, interpolation, + * or insertion penalty has been applied. + * + * @note When backing off to a unigram from a bigram or trigram, the + * unigram weight (interpolation with uniform) is not removed. + */ +SPHINXBASE_EXPORT +int32 ngram_probv(ngram_model_t *model, const char *word, ...); + +/** + * Get the "raw" log-probability for a general N-Gram. + * + * This returns the log-probability of an N-Gram, as defined in the + * language model file, before any language weighting, interpolation, + * or insertion penalty has been applied. + * + * @note When backing off to a unigram from a bigram or trigram, the + * unigram weight (interpolation with uniform) is not removed. + */ +SPHINXBASE_EXPORT +int32 ngram_prob(ngram_model_t *model, const char *const *words, int32 n); + +/** + * Quick "raw" probability lookup for a general N-Gram. + * + * See documentation for ngram_ng_score() and ngram_apply_weights() + * for an explanation of this. + */ +SPHINXBASE_EXPORT +int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history, + int32 n_hist, int32 *n_used); + +/** + * Convert score to "raw" log-probability. + * + * @note The unigram weight (interpolation with uniform) is not + * removed, since there is no way to know which order of N-Gram + * generated score. + * + * @param model The N-Gram model from which score was obtained. + * @param score The N-Gram score to convert + * @return The raw log-probability value. + */ +SPHINXBASE_EXPORT +int32 ngram_score_to_prob(ngram_model_t *model, int32 score); + +/** + * Look up numerical word ID. + */ +SPHINXBASE_EXPORT +int32 ngram_wid(ngram_model_t *model, const char *word); + +/** + * Look up word string for numerical word ID. + */ +SPHINXBASE_EXPORT +const char *ngram_word(ngram_model_t *model, int32 wid); + +/** + * Get the unknown word ID for a language model. + * + * Language models can be either "open vocabulary" or "closed + * vocabulary". The difference is that the former assigns a fixed + * non-zero unigram probability to unknown words, while the latter + * does not allow unknown words (or, equivalently, it assigns them + * zero probability). If this is a closed vocabulary model, this + * function will return NGRAM_INVALID_WID. + * + * @return The ID for the unknown word, or NGRAM_INVALID_WID if none + * exists. + */ +SPHINXBASE_EXPORT +int32 ngram_unknown_wid(ngram_model_t *model); + +/** + * Get the "zero" log-probability value for a language model. + */ +SPHINXBASE_EXPORT +int32 ngram_zero(ngram_model_t *model); + +/** + * Get the order of the N-gram model (i.e. the "N" in "N-gram") + */ +SPHINXBASE_EXPORT +int32 ngram_model_get_size(ngram_model_t *model); + +/** + * Get the counts of the various N-grams in the model. + */ +SPHINXBASE_EXPORT +int32 const *ngram_model_get_counts(ngram_model_t *model); + +/** + * M-gram iterator object. + */ +typedef struct ngram_iter_s ngram_iter_t; + +/** + * Iterate over all M-grams. + * + * @param model Language model to query. + * @param m Order of the M-Grams requested minus one (i.e. order of the history) + * @return An iterator over the requested M, or NULL if no N-grams of + * order M+1 exist. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m); + +/** + * Get an iterator over M-grams pointing to the specified M-gram. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...); + +/** + * Get an iterator over M-grams pointing to the specified M-gram. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist); + +/** + * Get information from the current M-gram in an iterator. + * + * @param out_score Output: Score for this M-gram (including any word + * penalty and language weight). + * @param out_bowt Output: Backoff weight for this M-gram. + * @return read-only array of word IDs. + */ +SPHINXBASE_EXPORT +int32 const *ngram_iter_get(ngram_iter_t *itor, + int32 *out_score, + int32 *out_bowt); + +/** + * Iterate over all M-gram successors of an M-1-gram. + * + * @param itor Iterator pointing to the M-1-gram to get successors of. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor); + +/** + * Advance an M-gram iterator. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_iter_next(ngram_iter_t *itor); + +/** + * Terminate an M-gram iterator. + */ +SPHINXBASE_EXPORT +void ngram_iter_free(ngram_iter_t *itor); + +/** + * Add a word (unigram) to the language model. + * + * @note The semantics of this are not particularly well-defined for + * model sets, and may be subject to change. Currently this will add + * the word to all of the submodels + * + * @param model The model to add a word to. + * @param word Text of the word to add. + * @param weight Weight of this word relative to the uniform distribution. + * @return The word ID for the new word. + */ +SPHINXBASE_EXPORT +int32 ngram_model_add_word(ngram_model_t *model, + const char *word, float32 weight); + +/** + * Read a class definition file and add classes to a language model. + * + * This function assumes that the class tags have already been defined + * as unigrams in the language model. All words in the class + * definition will be added to the vocabulary as special in-class words. + * For this reason is is necessary that they not have the same names + * as any words in the general unigram distribution. The convention + * is to suffix them with ":class_tag", where class_tag is the class + * tag minus the enclosing square brackets. + * + * @return 0 for success, <0 for error + */ +SPHINXBASE_EXPORT +int32 ngram_model_read_classdef(ngram_model_t *model, + const char *file_name); + +/** + * Add a new class to a language model. + * + * If classname already exists in the unigram set for + * model, then it will be converted to a class tag, and + * classweight will be ignored. Otherwise, a new unigram + * will be created as in ngram_model_add_word(). + */ +SPHINXBASE_EXPORT +int32 ngram_model_add_class(ngram_model_t *model, + const char *classname, + float32 classweight, + char **words, + const float32 *weights, + int32 n_words); + +/** + * Add a word to a class in a language model. + * + * @param model The model to add a word to. + * @param classname Name of the class to add this word to. + * @param word Text of the word to add. + * @param weight Weight of this word relative to the within-class uniform distribution. + * @return The word ID for the new word. + */ +SPHINXBASE_EXPORT +int32 ngram_model_add_class_word(ngram_model_t *model, + const char *classname, + const char *word, + float32 weight); + +/** + * Create a set of language models sharing a common space of word IDs. + * + * This function creates a meta-language model which groups together a + * set of language models, synchronizing word IDs between them. To + * use this language model, you can either select a submodel to use + * exclusively using ngram_model_set_select(), or interpolate + * between scores from all models. To do the latter, you can either + * pass a non-NULL value of the weights parameter, or + * re-activate interpolation later on by calling + * ngram_model_set_interp(). + * + * In order to make this efficient, there are some restrictions on the + * models that can be grouped together. The most important (and + * currently the only) one is that they must all + * share the same log-math parameters. + * + * @param config Any configuration parameters to be shared between models. + * @param models Array of pointers to previously created language models. + * @param names Array of strings to use as unique identifiers for LMs. + * @param weights Array of weights to use in interpolating LMs, or NULL + * for no interpolation. + * @param n_models Number of elements in the arrays passed to this function. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_init(cmd_ln_t *config, + ngram_model_t **models, + char **names, + const float32 *weights, + int32 n_models); + +/** + * Read a set of language models from a control file. + * + * This file creates a language model set from a "control file" of + * the type used in Sphinx-II and Sphinx-III. + * File format (optional stuff is indicated by enclosing in []): + * + *
+ *   [{ LMClassFileName LMClassFilename ... }]
+ *   TrigramLMFileName LMName [{ LMClassName LMClassName ... }]
+ *   TrigramLMFileName LMName [{ LMClassName LMClassName ... }]
+ *   ...
+ * (There should be whitespace around the { and } delimiters.)
+ * 
+ * + * This is an extension of the older format that had only TrigramLMFilenName + * and LMName pairs. The new format allows a set of LMClass files to be read + * in and referred to by the trigram LMs. + * + * No "comments" allowed in this file. + * + * @param config Configuration parameters. + * @param lmctlfile Path to the language model control file. + * @param lmath Log-math parameters to use for probability + * calculations. Ownership of this object is assumed by + * the newly created ngram_model_t, and you should not + * attempt to free it manually. If you wish to reuse it + * elsewhere, you must retain it with logmath_retain(). + * @return newly created language model set. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_read(cmd_ln_t *config, + const char *lmctlfile, + logmath_t *lmath); + +/** + * Returns the number of language models in a set. + */ +SPHINXBASE_EXPORT +int32 ngram_model_set_count(ngram_model_t *set); + +/** + * Iterator over language models in a set. + */ +typedef struct ngram_model_set_iter_s ngram_model_set_iter_t; + +/** + * Begin iterating over language models in a set. + * + * @return iterator pointing to the first language model, or NULL if no models remain. + */ +SPHINXBASE_EXPORT +ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set); + +/** + * Move to the next language model in a set. + * + * @return iterator pointing to the next language model, or NULL if no models remain. + */ +SPHINXBASE_EXPORT +ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor); + +/** + * Finish iteration over a langauge model set. + */ +SPHINXBASE_EXPORT +void ngram_model_set_iter_free(ngram_model_set_iter_t *itor); + +/** + * Get language model and associated name from an iterator. + * + * @param itor the iterator + * @param lmname Output: string name associated with this language model. + * @return Language model pointed to by this iterator. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor, + char const **lmname); + +/** + * Select a single language model from a set for scoring. + * + * @return the newly selected language model, or NULL if no language + * model by that name exists. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_select(ngram_model_t *set, + const char *name); + +/** + * Look up a language model by name from a set. + * + * @return language model corresponding to name, or NULL + * if no language model by that name exists. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_lookup(ngram_model_t *set, + const char *name); + +/** + * Get the current language model name, if any. + */ +SPHINXBASE_EXPORT +const char *ngram_model_set_current(ngram_model_t *set); + +/** + * Set interpolation weights for a set and enables interpolation. + * + * If weights is NULL, any previously initialized set of + * weights will be used. If no weights were specified to + * ngram_model_set_init(), then a uniform distribution will be used. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_interp(ngram_model_t *set, + const char **names, + const float32 *weights); + +/** + * Add a language model to a set. + * + * @param set The language model set to add to. + * @param model The language model to add. + * @param name The name to associate with this model. + * @param weight Interpolation weight for this model, relative to the + * uniform distribution. 1.0 is a safe value. + * @param reuse_widmap Reuse the existing word-ID mapping in + * set. Any new words present in model + * will not be added to the word-ID mapping in this case. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_add(ngram_model_t *set, + ngram_model_t *model, + const char *name, + float32 weight, + int reuse_widmap); + +/** + * Remove a language model from a set. + * + * @param set The language model set to remove from. + * @param name The name associated with the model to remove. + * @param reuse_widmap Reuse the existing word-ID mapping in + * set. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_remove(ngram_model_t *set, + const char *name, + int reuse_widmap); + +/** + * Set the word-to-ID mapping for this model set. + */ +SPHINXBASE_EXPORT +void ngram_model_set_map_words(ngram_model_t *set, + const char **words, + int32 n_words); + +/** + * Query the word-ID mapping for the current language model. + * + * @return the local word ID in the current language model, or + * NGRAM_INVALID_WID if set_wid is invalid or + * interpolation is enabled. + */ +SPHINXBASE_EXPORT +int32 ngram_model_set_current_wid(ngram_model_t *set, + int32 set_wid); + +/** + * Test whether a word ID corresponds to a known word in the current + * state of the language model set. + * + * @return If there is a current language model, returns non-zero if + * set_wid corresponds to a known word in that language + * model. Otherwise, returns non-zero if set_wid + * corresponds to a known word in any language model. + */ +SPHINXBASE_EXPORT +int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid); + +/** + * Flush any cached N-Gram information + * + * Some types of models cache trigram or other N-Gram information to + * speed repeated access to N-Grams with shared histories. This + * function flushes the cache so as to avoid dynamic memory leaks. + */ +SPHINXBASE_EXPORT +void ngram_model_flush(ngram_model_t *lm); + +#ifdef __cplusplus +} +#endif + + +#endif /* __NGRAM_MODEL_H__ */ diff --git a/media/sphinxbase/sphinxbase/pio.h b/media/sphinxbase/sphinxbase/pio.h new file mode 100644 index 000000000..fe094a60b --- /dev/null +++ b/media/sphinxbase/sphinxbase/pio.h @@ -0,0 +1,302 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * pio.h -- Packaged I/O routines. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: pio.h,v $ + * Revision 1.3 2005/06/22 08:00:09 arthchan2003 + * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs. + * + * Revision 1.2 2005/06/22 03:09:52 arthchan2003 + * 1, Fixed doxygen documentation, 2, Added keyword. + * + * Revision 1.2 2005/06/16 00:14:08 archan + * Added const keyword to file argument for file_open + * + * Revision 1.1 2005/06/15 06:11:03 archan + * sphinx3 to s3.generic: change io.[ch] to pio.[ch] + * + * Revision 1.5 2005/06/15 04:21:46 archan + * 1, Fixed doxygen-documentation, 2, Add keyword such that changes will be logged into a file. + * + * Revision 1.4 2005/04/20 03:49:32 archan + * Add const to string argument of myfopen. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 08-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added stat_mtime(). + * + * 11-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added _myfopen() and myfopen macro. + * + * 05-Sep-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started. + */ + + +#ifndef _LIBUTIL_IO_H_ +#define _LIBUTIL_IO_H_ + +#include +#if !defined(_WIN32_WCE) && !(defined(__ADSPBLACKFIN__) && !defined(__linux__)) +#include +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** \file pio.h + * \brief file IO related operations. + * + * Custom fopen with error checking is implemented. fopen_comp can + * open a file with .z, .Z, .gz or .GZ extension + * + * WARNING: Usage of stat_retry will results in 100s of waiting time + * if the file doesn't exist. +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Like fopen, but use popen and zcat if it is determined that "file" is compressed + * (i.e., has a .z, .Z, .gz, or .GZ extension). + */ +SPHINXBASE_EXPORT +FILE *fopen_comp (const char *file, /**< In: File to be opened */ + const char *mode, /**< In: "r" or "w", as with normal fopen */ + int32 *ispipe /**< Out: On return *ispipe is TRUE iff file + was opened via a pipe */ + ); + +/** + * Close a file opened using fopen_comp. + */ +SPHINXBASE_EXPORT +void fclose_comp (FILE *fp, /**< In: File pointer to be closed */ + int32 ispipe /**< In: ispipe argument that was returned by the + corresponding fopen_comp() call */ + ); + +/** + * Open a file for reading, but if file not present try to open compressed version (if + * file is uncompressed, and vice versa). + */ +SPHINXBASE_EXPORT +FILE *fopen_compchk (const char *file, /**< In: File to be opened */ + int32 *ispipe /**< Out: On return *ispipe is TRUE iff file + was opened via a pipe */ + ); + +/** + * Wrapper around fopen to check for failure and E_FATAL if failed. + */ +SPHINXBASE_EXPORT +FILE *_myfopen(const char *file, const char *mode, + const char *pgm, int32 line); /* In: __FILE__, __LINE__ from where called */ +#define myfopen(file,mode) _myfopen((file),(mode),__FILE__,__LINE__) + + +/** + * NFS file reads seem to fail now and then. Use the following functions in place of + * the regular fread. It retries failed freads several times and quits only if all of + * them fail. Be aware, however, that even normal failures such as attempting to read + * beyond EOF will trigger such retries, wasting about a minute in retries. + * Arguments identical to regular fread. + */ +SPHINXBASE_EXPORT +int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE *stream); + +/** + * Read a line of arbitrary length from a file and return it as a + * newly allocated string. + * + * @deprecated Use line iterators instead. + * + * @param stream The file handle to read from. + * @param out_len Output: if not NULL, length of the string read. + * @return allocated string containing the line, or NULL on error or EOF. + */ +SPHINXBASE_EXPORT +char *fread_line(FILE *stream, size_t *out_len); + +/** + * Line iterator for files. + */ +typedef struct lineiter_t { + char *buf; + FILE *fh; + int32 bsiz; + int32 len; + int32 clean; + int32 lineno; +} lineiter_t; + +/** + * Start reading lines from a file. + */ +SPHINXBASE_EXPORT +lineiter_t *lineiter_start(FILE *fh); + +/** + * Start reading lines from a file, skip comments and trim lines. + */ +SPHINXBASE_EXPORT +lineiter_t *lineiter_start_clean(FILE *fh); + +/** + * Move to the next line in the file. + */ +SPHINXBASE_EXPORT +lineiter_t *lineiter_next(lineiter_t *li); + +/** + * Stop reading lines from a file. + */ +SPHINXBASE_EXPORT +void lineiter_free(lineiter_t *li); + +/** + * Returns current line number. + */ +SPHINXBASE_EXPORT +int lineiter_lineno(lineiter_t *li); + + +#ifdef _WIN32_WCE +/* Fake this for WinCE which has no stat() */ +#include +struct stat { + DWORD st_mtime; + DWORD st_size; +}; +#endif /* _WIN32_WCE */ + +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) +struct stat { + int32 st_mtime; + int32 st_size; +}; + +#endif + +/** + * Bitstream encoder - for writing compressed files. + */ +typedef struct bit_encode_s bit_encode_t; + +/** + * Attach bitstream encoder to a file. + */ +bit_encode_t *bit_encode_attach(FILE *outfh); + +/** + * Retain pointer to a bit encoder. + */ +bit_encode_t *bit_encode_retain(bit_encode_t *be); + +/** + * Release pointer to a bit encoder. + * + * Note that this does NOT flush any leftover bits. + */ +int bit_encode_free(bit_encode_t *be); + +/** + * Write bits to encoder. + */ +int bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits); + +/** + * Write lowest-order bits of codeword to encoder. + */ +int bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits); + +/** + * Flush any unwritten bits, zero-padding if necessary. + */ +int bit_encode_flush(bit_encode_t *be); + +/** + * There is no bitstream decoder, because a stream abstraction is too + * slow. Instead we read blocks of bits and treat them as bitvectors. + */ + +/** + * Like fread_retry, but for stat. Arguments identical to regular stat. + * Return value: 0 if successful, -1 if stat failed several attempts. + */ +SPHINXBASE_EXPORT +int32 stat_retry (const char *file, struct stat *statbuf); + +/** + * Return time of last modification for the given file, or -1 if stat fails. + */ + +SPHINXBASE_EXPORT +int32 stat_mtime (const char *file); + +/** + * Create a directory and all of its parent directories, as needed. + * + * @return 0 on success, <0 on failure. + */ +SPHINXBASE_EXPORT +int build_directory(const char *path); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/prim_type.h b/media/sphinxbase/sphinxbase/prim_type.h new file mode 100644 index 000000000..820fe968f --- /dev/null +++ b/media/sphinxbase/sphinxbase/prim_type.h @@ -0,0 +1,183 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * prim_type.h -- Primitive types; more machine-independent. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: prim_type.h,v $ + * Revision 1.12 2005/10/05 00:31:14 dhdfu + * Make int8 be explicitly signed (signedness of 'char' is + * architecture-dependent). Then make a bunch of things use uint8 where + * signedness is unimportant, because on the architecture where 'char' is + * unsigned, it is that way for a reason (signed chars are slower). + * + * Revision 1.11 2005/06/22 03:10:23 arthchan2003 + * Added keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 12-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added arraysize_t, point_t, fpoint_t. + * + * 01-Feb-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added anytype_t. + * + * 08-31-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +#ifndef _LIBUTIL_PRIM_TYPE_H_ +#define _LIBUTIL_PRIM_TYPE_H_ + +/** + * @file prim_type.h + * @brief Basic type definitions used in Sphinx. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* Fool Emacs into not indenting things. */ +#endif + +#include + +/* Define some things for VisualDSP++ */ +#if defined(__ADSPBLACKFIN__) && !defined(__GNUC__) +# ifndef HAVE_LONG_LONG +# define HAVE_LONG_LONG +# endif +# ifndef ssize_t +typedef signed int ssize_t; +# endif +# define SIZEOF_LONG_LONG 8 +# define __BIGSTACKVARIABLE__ static +#else /* Not VisualDSP++ */ +# define __BIGSTACKVARIABLE__ +#endif + +/** + * Union of basic types. + */ +typedef union anytype_s { + void *ptr; + long i; + unsigned long ui; + double fl; +} anytype_t; + +/* + * Assume P64 or LP64. If you need to port this to a DSP, let us know. + */ +typedef int int32; +typedef short int16; +typedef signed char int8; +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; +typedef float float32; +typedef double float64; +#if defined(_MSC_VER) +typedef __int64 int64; +typedef unsigned __int64 uint64; +#elif defined(HAVE_LONG_LONG) && (SIZEOF_LONG_LONG == 8) +typedef long long int64; +typedef unsigned long long uint64; +#else /* !HAVE_LONG_LONG && SIZEOF_LONG_LONG == 8 */ +typedef double int64; +typedef double uint64; +#endif /* !HAVE_LONG_LONG && SIZEOF_LONG_LONG == 8 */ + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef NULL +#define NULL (void *)0 +#endif + +/* These really ought to come from , but not everybody has that. */ +/* Useful constants */ +#define MAX_INT32 ((int32) 0x7fffffff) +#define MAX_INT16 ((int16) 0x00007fff) +#define MAX_INT8 ((int8) 0x0000007f) + +#define MAX_NEG_INT32 ((int32) 0x80000000) +#define MAX_NEG_INT16 ((int16) 0xffff8000) +#define MAX_NEG_INT8 ((int8) 0xffffff80) + +#define MAX_UINT32 ((uint32) 0xffffffff) +#define MAX_UINT16 ((uint16) 0x0000ffff) +#define MAX_UINT8 ((uint8) 0x000000ff) + +/* The following are approximate; IEEE floating point standards might quibble! */ +#define MAX_POS_FLOAT32 3.4e+38f +#define MIN_POS_FLOAT32 1.2e-38f /* But not 0 */ +#define MAX_POS_FLOAT64 1.8e+307 +#define MIN_POS_FLOAT64 2.2e-308 + +#define MAX_IEEE_NORM_POS_FLOAT32 3.4e+38f +#define MIN_IEEE_NORM_POS_FLOAT32 1.2e-38f +#define MIN_IEEE_NORM_NEG_FLOAT32 -3.4e+38f +#define MAX_IEEE_NORM_POS_FLOAT64 1.8e+307 +#define MIN_IEEE_NORM_POS_FLOAT64 2.2e-308 +#define MIN_IEEE_NORM_NEG_FLOAT64 -1.8e+307 + +/* Will the following really work?? */ +#define MIN_NEG_FLOAT32 ((float32) (-MIN_POS_FLOAT32)) +#define MIN_NEG_FLOAT64 ((float64) (-MIN_POS_FLOAT64)) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/profile.h b/media/sphinxbase/sphinxbase/profile.h new file mode 100644 index 000000000..ddecfb6e8 --- /dev/null +++ b/media/sphinxbase/sphinxbase/profile.h @@ -0,0 +1,231 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * profile.h -- For timing and event counting. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: profile.h,v $ + * Revision 1.10 2005/06/22 03:10:59 arthchan2003 + * 1, Fixed doxygen documentation, 2, Added keyword. + * + * Revision 1.5 2005/06/15 04:21:47 archan + * 1, Fixed doxygen-documentation, 2, Add keyword such that changes will be logged into a file. + * + * Revision 1.4 2005/04/25 19:22:48 archan + * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 11-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added ptmr_init(). + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from earlier Sphinx-3 version. + */ + + +#ifndef _LIBUTIL_PROFILE_H_ +#define _LIBUTIL_PROFILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* Fool Emacs into not indenting things. */ +#endif + +/** \file profile.h + * \brief Implementation of profiling, include counting , timing, cpu clock checking + * + * Currently, function host_endian is also in this function. It is + * not documented. + */ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + + +/** + * \struct pctr_t + * + * Generic event counter for profiling. User is responsible for allocating an array + * of the desired number. There should be a sentinel with name = NULL. + */ +typedef struct { + char *name; /**< Counter print name; NULL + terminates array of counters + Used by pctr_print_all */ + int32 count; /**< Counter value */ +} pctr_t; + +/** + * operations of pctr_t + */ + +/** + * Initialize a counter + * @return an initialized counter + */ +SPHINXBASE_EXPORT +pctr_t* pctr_new ( + char *name /**< The name of the counter */ + ); + +/** + * Reset a counter + */ + +SPHINXBASE_EXPORT +void pctr_reset (pctr_t *ctr /**< A pointer of a counter */ + ); + +/** + * Print a counter + */ +SPHINXBASE_EXPORT +void pctr_print(FILE *fp, /**< A file pointer */ + pctr_t *ctr /**< A pointer of a counter */ + ); + +/** + * Increment a counter + */ +SPHINXBASE_EXPORT +void pctr_increment (pctr_t *ctr, /**< A pointer of a counter */ + int32 inc /**< The increment of the counter */ + ); + +/** + Free the counter +*/ +SPHINXBASE_EXPORT +void pctr_free(pctr_t* ctr /**< A pointer of a counter */ + ); + + +/** + * \struct ptmr_t + * Generic timer structures and functions for coarse-grained performance measurements + * using standard system calls. + */ +typedef struct { + const char *name; /**< Timer print name; NULL terminates an array of timers. + Used by ptmr_print_all */ + float64 t_cpu; /**< CPU time accumulated since most recent reset op */ + float64 t_elapsed; /**< Elapsed time accumulated since most recent reset */ + float64 t_tot_cpu; /**< Total CPU time since creation */ + float64 t_tot_elapsed; /**< Total elapsed time since creation */ + float64 start_cpu; /**< ---- FOR INTERNAL USE ONLY ---- */ + float64 start_elapsed; /**< ---- FOR INTERNAL USE ONLY ---- */ +} ptmr_t; + + + +/** Start timing using tmr */ +SPHINXBASE_EXPORT +void ptmr_start (ptmr_t *tmr /**< The timer*/ + ); + +/** Stop timing and accumulate tmr->{t_cpu, t_elapsed, t_tot_cpu, t_tot_elapsed} */ +SPHINXBASE_EXPORT +void ptmr_stop (ptmr_t *tmr /**< The timer*/ + ); + +/** Reset tmr->{t_cpu, t_elapsed} to 0.0 */ +SPHINXBASE_EXPORT +void ptmr_reset (ptmr_t *tmr /**< The timer*/ + ); + +/** Reset tmr->{t_cpu, t_elapsed, t_tot_cpu, t_tot_elapsed} to 0.0 + */ +SPHINXBASE_EXPORT +void ptmr_init (ptmr_t *tmr /**< The timer*/ + ); + + +/** + * Reset t_cpu, t_elapsed of all timer modules in array tmr[] to 0.0. + * The array should be terminated with a sentinel with .name = NULL. + */ +SPHINXBASE_EXPORT +void ptmr_reset_all (ptmr_t *tmr /**< The timer*/ + ); + +/** + * Print t_cpu for all timer modules in tmr[], normalized by norm (i.e., t_cpu/norm). + * The array should be terminated with a sentinel with .name = NULL. + */ +SPHINXBASE_EXPORT +void ptmr_print_all (FILE *fp, /**< The file pointer */ + ptmr_t *tmr, /**< The timer*/ + float64 norm + ); + + +/** + * Return the processor clock speed (in MHz); only available on some machines (Alphas). + * The dummy argument can be any integer value. + */ +SPHINXBASE_EXPORT +int32 host_pclk (int32 dummy); + + +/* + * Check the native byte-ordering of the machine by writing a magic + * number to a temporary file and reading it back. * Return value: + * 0 if BIG-ENDIAN, 1 if LITTLE-ENDIAN, -1 if error. + */ +SPHINXBASE_EXPORT +int32 host_endian ( void ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/media/sphinxbase/sphinxbase/sbthread.h b/media/sphinxbase/sphinxbase/sbthread.h new file mode 100644 index 000000000..9154dfe2b --- /dev/null +++ b/media/sphinxbase/sphinxbase/sbthread.h @@ -0,0 +1,221 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file sbthread.h + * @brief Simple portable thread functions. + * @author David Huggins-Daines + **/ + +#ifndef __SBTHREAD_H__ +#define __SBTHREAD_H__ + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Thread object. + */ +typedef struct sbthread_s sbthread_t; + +/** + * Asynchronous message queue object. + */ +typedef struct sbmsgq_s sbmsgq_t; + +/** + * Mutex (critical section) object. + */ +typedef struct sbmtx_s sbmtx_t; + +/** + * Event object. + */ +typedef struct sbevent_s sbevent_t; + +/** + * Entry point for a thread. + */ +typedef int (*sbthread_main)(sbthread_t *th); + +/** + * Start a new thread. + */ +SPHINXBASE_EXPORT +sbthread_t *sbthread_start(cmd_ln_t *config, sbthread_main func, void *arg); + +/** + * Wait for a thread to complete. + */ +SPHINXBASE_EXPORT +int sbthread_wait(sbthread_t *th); + +/** + * Free a thread object. + */ +SPHINXBASE_EXPORT +void sbthread_free(sbthread_t *th); + +/** + * Get configuration object from a thread. + */ +SPHINXBASE_EXPORT +cmd_ln_t *sbthread_config(sbthread_t *th); + +/** + * Get argument pointer from a thread. + */ +SPHINXBASE_EXPORT +void *sbthread_arg(sbthread_t *th); + +/** + * Get message queue from a thread. + */ +SPHINXBASE_EXPORT +sbmsgq_t *sbthread_msgq(sbthread_t *th); + +/** + * Wait for a thread to complete. + */ +SPHINXBASE_EXPORT +int sbthread_wait(sbthread_t *th); + +/** + * Send an asynchronous message to a thread. + * + * Each thread gets a message queue by default, so this is just a + * wrapper around sbmsgq_send(). + */ +SPHINXBASE_EXPORT +int sbthread_send(sbthread_t *th, size_t len, void const *data); + +/** + * Create a message queue. + * + * @param depth Depth of the queue. + */ +SPHINXBASE_EXPORT +sbmsgq_t *sbmsgq_init(size_t depth); + +/** + * Free a message queue. + */ +SPHINXBASE_EXPORT +void sbmsgq_free(sbmsgq_t *q); + +/** + * Post a message to a queue. + */ +SPHINXBASE_EXPORT +int sbmsgq_send(sbmsgq_t *q, size_t len, void const *data); + +/** + * Wait for a message from a queue. + */ +SPHINXBASE_EXPORT +void *sbmsgq_wait(sbmsgq_t *q, size_t *out_len, int sec, int nsec); + +/** + * Create a mutex. + */ +SPHINXBASE_EXPORT +sbmtx_t *sbmtx_init(void); + +/** + * Try to acquire a mutex. + */ +SPHINXBASE_EXPORT +int sbmtx_trylock(sbmtx_t *mtx); + +/** + * Acquire a mutex. + */ +SPHINXBASE_EXPORT +int sbmtx_lock(sbmtx_t *mtx); + +/** + * Release a mutex. + */ +SPHINXBASE_EXPORT +int sbmtx_unlock(sbmtx_t *mtx); + +/** + * Dispose of a mutex. + */ +SPHINXBASE_EXPORT +void sbmtx_free(sbmtx_t *mtx); + +/** + * Initialize an event. + */ +SPHINXBASE_EXPORT +sbevent_t *sbevent_init(void); + +/** + * Free an event. + */ +SPHINXBASE_EXPORT +void sbevent_free(sbevent_t *evt); + +/** + * Signal an event. + */ +SPHINXBASE_EXPORT +int sbevent_signal(sbevent_t *evt); + +/** + * Wait for an event to be signalled. + */ +SPHINXBASE_EXPORT +int sbevent_wait(sbevent_t *evt, int sec, int nsec); + + +#ifdef __cplusplus +} +#endif + + +#endif /* __SBTHREAD_H__ */ diff --git a/media/sphinxbase/sphinxbase/sphinx_config.h b/media/sphinxbase/sphinxbase/sphinx_config.h new file mode 100644 index 000000000..6e409bb37 --- /dev/null +++ b/media/sphinxbase/sphinxbase/sphinx_config.h @@ -0,0 +1,42 @@ +#if ( defined(_WIN32) || defined(__CYGWIN__) ) +/* include/sphinx_config.h, defaults for Win32 */ +/* sphinx_config.h: Externally visible configuration parameters for + * SphinxBase. + */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use Q15 fixed-point computation */ +/* #undef FIXED16 */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 +#else +/* include/sphinx_config.h. Generated from sphinx_config.h.in by configure. */ +/* sphinx_config.h: Externally visible configuration parameters */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use Q15 fixed-point computation */ +/* #undef FIXED16 */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* Define to 1 if the system has the type `long long'. */ +/*#define HAVE_LONG_LONG 1*/ + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* Enable debugging output */ +/* #undef SPHINX_DEBUG */ +#endif diff --git a/media/sphinxbase/sphinxbase/sphinxbase_export.h b/media/sphinxbase/sphinxbase/sphinxbase_export.h new file mode 100644 index 000000000..535d46027 --- /dev/null +++ b/media/sphinxbase/sphinxbase/sphinxbase_export.h @@ -0,0 +1,15 @@ +#ifndef __SPHINXBASE_EXPORT_H__ +#define __SPHINXBASE_EXPORT_H__ + +/* Win32/WinCE DLL gunk */ +#if (defined(_WIN32) || defined(_WIN32_WCE)) && !defined(_WIN32_WP) && !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__WINSCW__) && !defined(__SYMBIAN32__) +#if defined(SPHINXBASE_EXPORTS) /* Visual Studio */ +#define SPHINXBASE_EXPORT __declspec(dllexport) +#else +#define SPHINXBASE_EXPORT __declspec(dllimport) +#endif +#else /* !_WIN32 */ +#define SPHINXBASE_EXPORT +#endif + +#endif /* __SPHINXBASE_EXPORT_H__ */ diff --git a/media/sphinxbase/sphinxbase/strfuncs.h b/media/sphinxbase/sphinxbase/strfuncs.h new file mode 100644 index 000000000..392f1ca22 --- /dev/null +++ b/media/sphinxbase/sphinxbase/strfuncs.h @@ -0,0 +1,158 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1995-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file strfuncs.h + * @brief Miscellaneous useful string functions + */ + +#ifndef __SB_STRFUNCS_H__ +#define __SB_STRFUNCS_H__ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Concatenate a NULL-terminated argument list of strings, returning a + * newly allocated string. + **/ +SPHINXBASE_EXPORT +char *string_join(const char *base, ...); + +/** + * Which end of a string to operate on for string_trim(). + */ +enum string_edge_e { + STRING_START, /**< Beginning of string. */ + STRING_END, /**< End of string. */ + STRING_BOTH /**< Both ends of string. */ +}; + +/** + * Remove whitespace from a string, modifying it in-place. + * + * @param string string to trim, contents will be modified. + * @param which one of STRING_START, STRING_END, or STRING_BOTH. + */ +SPHINXBASE_EXPORT +char *string_trim(char *string, enum string_edge_e which); + +/** + * Locale independent version of atof(). + * + * This function behaves like atof() in the "C" locale. Switching + * locale in a threaded program is extremely uncool, therefore we need + * this since we pass floats as strings in 1000 different places. + */ +SPHINXBASE_EXPORT +double atof_c(char const *str); + +/* FIXME: Both of these string splitting functions basically suck. I + have attempted to fix them as best I can. (dhuggins@cs, 20070808) */ + +/** + * Convert a line to an array of "words", based on whitespace separators. A word + * is a string with no whitespace chars in it. + * Note that the string line is modified as a result: NULL chars are placed after + * every word in the line. + * Return value: No. of words found; -1 if no. of words in line exceeds n_wptr. + */ +SPHINXBASE_EXPORT +int32 str2words (char *line, /**< In/Out: line to be parsed. This + string will be modified! (NUL + characters inserted at word + boundaries) */ + char **wptr, /**< In/Out: Array of pointers to + words found in line. The array + must be allocated by the caller. + It may be NULL in which case the + number of words will be counted. + This allows you to allcate it to + the proper size, e.g.: + + n = str2words(line, NULL, 0); + wptr = ckd_calloc(n, sizeof(*wptr)); + str2words(line, wptr, n); + */ + int32 n_wptr /**< In: Size of wptr array, ignored + if wptr == NULL */ + ); + +/** + * Yet another attempt at a clean "next-word-in-string" function. See arguments below. + * @return Length of word returned, or -1 if nothing found. + * This allows you to scan through a line: + * + *
+ * while ((n = nextword(line, delim, &word, &delimfound)) >= 0) {
+ *     ... do something with word ..
+ *     word[n] = delimfound;
+ *     line = word + n;
+ * }
+ * 
+ */ +SPHINXBASE_EXPORT +int32 nextword (char *line, /**< Input: String being searched for next word. + Will be modified by this function (NUL characters inserted) */ + const char *delim, /**< Input: A word, if found, must be delimited at either + end by a character from this string (or at the end + by the NULL char) */ + char **word,/**< Output: *word = ptr within line to beginning of first + word, if found. Delimiter at the end of word replaced + with the NULL char. */ + char *delimfound /**< Output: *delimfound = original delimiter found at the end + of the word. (This way, the caller can restore the + delimiter, preserving the original string.) */ + ); + +#ifdef __cplusplus +} +#endif + + +#endif /* __SB_STRFUNCS_H__ */ diff --git a/media/sphinxbase/sphinxbase/yin.h b/media/sphinxbase/sphinxbase/yin.h new file mode 100644 index 000000000..87a9eac55 --- /dev/null +++ b/media/sphinxbase/sphinxbase/yin.h @@ -0,0 +1,136 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Copyright (c) 2008 Beyond Access, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR + * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file yin.h + * @brief Implementation of pitch estimation + * @author David Huggins-Daines + * + * This implements part of the YIN algorithm: + * + * "YIN, a fundamental frequency estimator for speech and music". + * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical + * Society of America, 111 (4), April 2002. + */ + +#ifndef __YIN_H__ +#define __YIN_H__ + +#ifdef __cplusplus +extern "C" +#endif +#if 0 +} /* Fool Emacs. */ +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** + * Frame-based moving-window pitch estimator. + */ +typedef struct yin_s yin_t; + +/** + * Initialize moving-window pitch estimation. + */ +SPHINXBASE_EXPORT +yin_t *yin_init(int frame_size, float search_threshold, + float search_range, int smooth_window); + +/** + * Free a moving-window pitch estimator. + */ +SPHINXBASE_EXPORT +void yin_free(yin_t *pe); + +/** + * Start processing an utterance. + */ +SPHINXBASE_EXPORT +void yin_start(yin_t *pe); + +/** + * Mark the end of an utterance. + */ +SPHINXBASE_EXPORT +void yin_end(yin_t *pe); + +/** + * Store a frame of data to the pitch estimator. + * + * @param pe Pitch estimator. + * @param frame Frame of frame_size (see + * yin_init()) samples of audio data. + */ +SPHINXBASE_EXPORT +void yin_store(yin_t *pe, int16 const *frame); + +/** + * Feed a frame of data to the pitch estimator. + * + * @param pe Pitch estimator. + * @param frame Frame of frame_size (see + * yin_init()) samples of audio data. + */ +SPHINXBASE_EXPORT +void yin_write(yin_t *pe, int16 const *frame); + +/** + * Feed stored frame of data to the pitch estimator. + * (see yin_store()) + * + * @param pe Pitch estimator. + */ +SPHINXBASE_EXPORT +void yin_write_stored(yin_t *pe); + +/** + * Read a raw estimated pitch value from the pitch estimator. + * + * @param pe Pitch estimator. + * @param out_period Output: an estimate of the period (*not* the pitch) + * of the signal in samples. + * @param out_bestdiff Output: the minimum normalized difference value + * associated with *out_pitch, in Q15 + * format (i.e. scaled by 32768). This can be + * interpreted as one minus the probability of voicing. + * @return Non-zero if enough data was avaliable to return a pitch + * estimate, zero otherwise. + */ +SPHINXBASE_EXPORT +int yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff); + +#ifdef __cplusplus +} +#endif + +#endif /* __YIN_H__ */ + diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c b/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c new file mode 100644 index 000000000..cd2e1e2db --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_interface.c @@ -0,0 +1,776 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#include +#include +#include +#include +#include +#ifdef _WIN32_WCE +#include +#else +#include +#endif + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmd_ln.h" +#include "sphinxbase/ckd_alloc.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +static const arg_t fe_args[] = { + waveform_to_cepstral_command_line_macro(), + { NULL, 0, NULL, NULL } +}; + +int +fe_parse_general_params(cmd_ln_t *config, fe_t * fe) +{ + int j, frate; + + fe->config = config; + fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); + frate = cmd_ln_int32_r(config, "-frate"); + if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) { + E_ERROR + ("Frame rate %d can not be bigger than sample rate %.02f\n", + frate, fe->sampling_rate); + return -1; + } + + fe->frame_rate = (int16)frate; + if (cmd_ln_boolean_r(config, "-dither")) { + fe->dither = 1; + fe->seed = cmd_ln_int32_r(config, "-seed"); + } +#ifdef WORDS_BIGENDIAN + fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; +#else + fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; +#endif + fe->window_length = cmd_ln_float32_r(config, "-wlen"); + fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); + + fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); + fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); + + /* Check FFT size, compute FFT order (log_2(n)) */ + for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { + if (((j % 2) != 0) || (fe->fft_size <= 0)) { + E_ERROR("fft: number of points must be a power of 2 (is %d)\n", + fe->fft_size); + return -1; + } + } + /* Verify that FFT size is greater or equal to window length. */ + if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { + E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", + (int)(fe->window_length * fe->sampling_rate)); + return -1; + } + + fe->prespch_len = (int16)cmd_ln_int32_r(config, "-vad_prespeech"); + fe->postspch_len = (int16)cmd_ln_int32_r(config, "-vad_postspeech"); + fe->vad_threshold = cmd_ln_float32_r(config, "-vad_threshold"); + + fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); + fe->remove_noise = cmd_ln_boolean_r(config, "-remove_noise"); + fe->remove_silence = cmd_ln_boolean_r(config, "-remove_silence"); + + if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) + fe->transform = DCT_II; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) + fe->transform = LEGACY_DCT; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) + fe->transform = DCT_HTK; + else { + E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); + return -1; + } + + if (cmd_ln_boolean_r(config, "-logspec")) + fe->log_spec = RAW_LOG_SPEC; + if (cmd_ln_boolean_r(config, "-smoothspec")) + fe->log_spec = SMOOTH_LOG_SPEC; + + return 0; +} + +static int +fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) +{ + mel->sampling_rate = fe->sampling_rate; + mel->fft_size = fe->fft_size; + mel->num_cepstra = fe->num_cepstra; + mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); + + if (fe->log_spec) + fe->feature_dimension = mel->num_filters; + else + fe->feature_dimension = fe->num_cepstra; + + mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); + mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); + + mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); + + mel->warp_type = cmd_ln_str_r(config, "-warp_type"); + mel->warp_params = cmd_ln_str_r(config, "-warp_params"); + mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); + + mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); + mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); + + if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { + E_ERROR("Failed to initialize the warping function.\n"); + return -1; + } + fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); + return 0; +} + +void +fe_print_current(fe_t const *fe) +{ + E_INFO("Current FE Parameters:\n"); + E_INFO("\tSampling Rate: %f\n", fe->sampling_rate); + E_INFO("\tFrame Size: %d\n", fe->frame_size); + E_INFO("\tFrame Shift: %d\n", fe->frame_shift); + E_INFO("\tFFT Size: %d\n", fe->fft_size); + E_INFO("\tLower Frequency: %g\n", + fe->mel_fb->lower_filt_freq); + E_INFO("\tUpper Frequency: %g\n", + fe->mel_fb->upper_filt_freq); + E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters); + E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps); + E_INFO("\tStart Utt Status: %d\n", fe->start_flag); + E_INFO("Will %sremove DC offset at frame level\n", + fe->remove_dc ? "" : "not "); + if (fe->dither) { + E_INFO("Will add dither to audio\n"); + E_INFO("Dither seeded with %d\n", fe->seed); + } + else { + E_INFO("Will not add dither to audio\n"); + } + if (fe->mel_fb->lifter_val) { + E_INFO("Will apply sine-curve liftering, period %d\n", + fe->mel_fb->lifter_val); + } + E_INFO("Will %snormalize filters to unit area\n", + fe->mel_fb->unit_area ? "" : "not "); + E_INFO("Will %sround filter frequencies to DFT points\n", + fe->mel_fb->round_filters ? "" : "not "); + E_INFO("Will %suse double bandwidth in mel filter\n", + fe->mel_fb->doublewide ? "" : "not "); +} + +fe_t * +fe_init_auto() +{ + return fe_init_auto_r(cmd_ln_get()); +} + +fe_t * +fe_init_auto_r(cmd_ln_t *config) +{ + fe_t *fe; + int prespch_frame_len; + + fe = (fe_t*)ckd_calloc(1, sizeof(*fe)); + fe->refcount = 1; + + /* transfer params to front end */ + if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { + fe_free(fe); + return NULL; + } + + /* compute remaining fe parameters */ + /* We add 0.5 so approximate the float with the closest + * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 + */ + fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); + fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); + fe->prior = 0; + + fe_start_stream(fe); + + assert (fe->frame_shift > 1); + + if (fe->frame_size > (fe->fft_size)) { + E_ERROR + ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n", + fe->frame_size, fe->fft_size); + fe_free(fe); + return NULL; + } + + if (fe->dither) + fe_init_dither(fe->seed); + + /* establish buffers for overflow samps and hamming window */ + fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); + fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); + + /* create hamming window */ + fe_create_hamming(fe->hamming_window, fe->frame_size); + + /* init and fill appropriate filter structure */ + fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); + + /* transfer params to mel fb */ + fe_parse_melfb_params(config, fe, fe->mel_fb); + + if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) { + E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", + fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2); + fe_free(fe); + return NULL; + } + + fe_build_melfilters(fe->mel_fb); + + fe_compute_melcosine(fe->mel_fb); + if (fe->remove_noise || fe->remove_silence) + fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters); + + fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data)); + prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters; + fe->vad_data->prespch_buf = fe_prespch_init(fe->prespch_len + 1, prespch_frame_len, fe->frame_shift); + + /* Create temporary FFT, spectrum and mel-spectrum buffers. */ + /* FIXME: Gosh there are a lot of these. */ + fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); + fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); + fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); + fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); + + /* create twiddle factors */ + fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); + fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); + fe_create_twiddle(fe); + + if (cmd_ln_boolean_r(config, "-verbose")) { + fe_print_current(fe); + } + + /*** Initialize the overflow buffers ***/ + fe_start_utt(fe); + return fe; +} + +arg_t const * +fe_get_args(void) +{ + return fe_args; +} + +const cmd_ln_t * +fe_get_config(fe_t *fe) +{ + return fe->config; +} + +void +fe_init_dither(int32 seed) +{ + if (seed < 0) { + E_INFO("You are using the internal mechanism to generate the seed.\n"); +#ifdef _WIN32_WCE + s3_rand_seed(GetTickCount()); +#else + s3_rand_seed((long) time(0)); +#endif + } else { + E_INFO("You are using %d as the seed.\n", seed); + s3_rand_seed(seed); + } +} + +static void +fe_reset_vad_data(vad_data_t * vad_data) +{ + vad_data->global_state = 0; + vad_data->state_changed = 0; + vad_data->prespch_num = 0; + vad_data->postspch_num = 0; + fe_prespch_reset_cep(vad_data->prespch_buf); +} + +int32 +fe_start_utt(fe_t * fe) +{ + fe->num_overflow_samps = 0; + memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16)); + fe->start_flag = 1; + fe->prior = 0; + fe_reset_vad_data(fe->vad_data); + return 0; +} + +void +fe_start_stream(fe_t *fe) +{ + fe->sample_counter = 0; + fe_reset_noisestats(fe->noise_stats); +} + +int +fe_get_output_size(fe_t *fe) +{ + return (int)fe->feature_dimension; +} + +void +fe_get_input_size(fe_t *fe, int *out_frame_shift, + int *out_frame_size) +{ + if (out_frame_shift) + *out_frame_shift = fe->frame_shift; + if (out_frame_size) + *out_frame_size = fe->frame_size; +} + +uint8 +fe_get_vad_state(fe_t *fe) +{ + return fe->vad_data->global_state; +} + +int +fe_process_frames(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int32 *out_frameidx) +{ + int outidx, n_overflow, orig_n_overflow; + int16 const *orig_spch; + size_t orig_nsamps; + + /* In the special case where there is no output buffer, return the + * maximum number of frames which would be generated. */ + if (buf_cep == NULL) { + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) + *inout_nframes = 0; + else + *inout_nframes = 1 + + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) + / fe->frame_shift); + if (fe->vad_data->global_state) + *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf); + return *inout_nframes; + } + + if (out_frameidx) + *out_frameidx = 0; + + /* Are there not enough samples to make at least 1 frame? */ + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) { + if (*inout_nsamps > 0) { + /* Append them to the overflow buffer. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, *inout_nsamps * (sizeof(int16))); + fe->num_overflow_samps += *inout_nsamps; + /* Update input-output pointers and counters. */ + *inout_spch += *inout_nsamps; + *inout_nsamps = 0; + } + /* We produced no frames of output, sorry! */ + *inout_nframes = 0; + return 0; + } + + /* Can't write a frame? Then do nothing! */ + if (*inout_nframes < 1) { + *inout_nframes = 0; + return 0; + } + + /* Index of output frame. */ + outidx = 0; + + /* Try to read from prespeech buffer */ + if (fe->vad_data->global_state) { + while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) { + outidx++; + (*inout_nframes)--; + } + if ((*inout_nframes) < 1) { + /* mfcc buffer is filled from prespeech buffer */ + *inout_nframes = outidx; + return 0; + } + + /* Sets the start frame for the returned data so that caller can update timings */ + if (out_frameidx && fe->vad_data->state_changed) { + *out_frameidx = fe->sample_counter / fe->frame_shift - fe->prespch_len; + } + } + + /* Keep track of the original start of the buffer. */ + orig_spch = *inout_spch; + orig_nsamps = *inout_nsamps; + orig_n_overflow = fe->num_overflow_samps; + + /* Start processing, taking care of any incoming overflow. */ + if (fe->num_overflow_samps) { + int offset = fe->frame_size - fe->num_overflow_samps; + + /* Append start of spch to overflow samples to make a full frame. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, offset * sizeof(**inout_spch)); + fe_read_frame(fe, fe->overflow_samps, fe->frame_size); + /* Update input-output pointers and counters. */ + *inout_spch += offset; + *inout_nsamps -= offset; + fe->num_overflow_samps -= fe->frame_shift; + } else { + fe_read_frame(fe, *inout_spch, fe->frame_size); + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_size; + *inout_nsamps -= fe->frame_size; + } + + fe_write_frame(fe, buf_cep[outidx]); + + if (!fe->vad_data->state_changed && fe->vad_data->global_state) { + outidx++; + (*inout_nframes)--; + } + if (fe->vad_data->state_changed && fe->vad_data->global_state) { + /* previous frame triggered vad into speech state + * dumping prespeech buffer */ + while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) { + outidx++; + (*inout_nframes)--; + } + + /* Sets the start frame for the returned data so that caller can update timings */ + if (out_frameidx) { + *out_frameidx = (fe->sample_counter + orig_nsamps - *inout_nsamps) / fe->frame_shift - fe->prespch_len; + } + } + + /* Process all remaining frames. */ + while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) { + fe_shift_frame(fe, *inout_spch, fe->frame_shift); + fe_write_frame(fe, buf_cep[outidx]); + if (!fe->vad_data->state_changed && fe->vad_data->global_state) { + (*inout_nframes)--; + outidx++; + } + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_shift; + *inout_nsamps -= fe->frame_shift; + /* Amount of data behind the original input which is still needed. */ + if (fe->num_overflow_samps > 0) + fe->num_overflow_samps -= fe->frame_shift; + + if (fe->vad_data->state_changed && fe->vad_data->global_state) { + /* previous frame triggered vad into speech state */ + while (*inout_nframes > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) != 0) { + (*inout_nframes)--; + outidx++; + } + } + } + + /* How many relevant overflow samples are there left? */ + if (fe->num_overflow_samps <= 0) { + /* Maximum number of overflow samples past *inout_spch to save. */ + n_overflow = *inout_nsamps; + if (n_overflow > fe->frame_shift) + n_overflow = fe->frame_shift; + fe->num_overflow_samps = fe->frame_size - fe->frame_shift; + /* Make sure this isn't an illegal read! */ + if (fe->num_overflow_samps > *inout_spch - orig_spch) + fe->num_overflow_samps = *inout_spch - orig_spch; + fe->num_overflow_samps += n_overflow; + if (fe->num_overflow_samps > 0) { + memcpy(fe->overflow_samps, + *inout_spch - (fe->frame_size - fe->frame_shift), + fe->num_overflow_samps * sizeof(**inout_spch)); + /* Update the input pointer to cover this stuff. */ + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } else { + /* There is still some relevant data left in the overflow buffer. */ + /* Shift existing data to the beginning. */ + memmove(fe->overflow_samps, + fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps, + fe->num_overflow_samps * sizeof(*fe->overflow_samps)); + /* Copy in whatever we had in the original speech buffer. */ + n_overflow = *inout_spch - orig_spch + *inout_nsamps; + if (n_overflow > fe->frame_size - fe->num_overflow_samps) + n_overflow = fe->frame_size - fe->num_overflow_samps; + memcpy(fe->overflow_samps + fe->num_overflow_samps, + orig_spch, n_overflow * sizeof(*orig_spch)); + fe->num_overflow_samps += n_overflow; + /* Advance the input pointers. */ + if (n_overflow > *inout_spch - orig_spch) { + n_overflow -= (*inout_spch - orig_spch); + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } + + /* Finally update the frame counter with the number of frames + * and global sample counter with number of samples we procesed*/ + *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */ + fe->sample_counter += orig_nsamps - *inout_nsamps; + return 0; +} + +int +fe_process_frames_ext(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes, + int16 **voiced_spch, + int32 *voiced_spch_nsamps, + int32 *out_frameidx) +{ + int proc_result; + + fe_prespch_extend_pcm(fe->vad_data->prespch_buf, *inout_nframes); + + fe->vad_data->store_pcm = TRUE; + proc_result = fe_process_frames(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes, out_frameidx); + fe->vad_data->store_pcm = FALSE; + + if (fe->vad_data->global_state) + fe_prespch_read_pcm(fe->vad_data->prespch_buf, voiced_spch, voiced_spch_nsamps); + else + *voiced_spch_nsamps = 0; + + return proc_result; +} + +int +fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps, + mfcc_t *** cep_block, int32 * nframes) +{ + mfcc_t **cep; + int rv; + + /* Figure out how many frames we will need. */ + fe_process_frames(fe, NULL, &nsamps, NULL, nframes, NULL); + /* Create the output buffer (it has to exist, even if there are no output frames). */ + if (*nframes) + cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep)); + else + cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep)); + /* Now just call fe_process_frames() with the allocated buffer. */ + rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes, NULL); + *cep_block = cep; + + return rv; +} + + +int32 +fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes) +{ + /* Process any remaining data. */ + *nframes = 0; + if (fe->num_overflow_samps > 0) { + fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps); + fe_write_frame(fe, cepvector); + if (!fe->vad_data->state_changed && fe->vad_data->global_state) + (*nframes)++; + } + + /* reset overflow buffers... */ + fe->num_overflow_samps = 0; + fe->start_flag = 0; + + return 0; +} + +fe_t * +fe_retain(fe_t *fe) +{ + ++fe->refcount; + return fe; +} + +int +fe_free(fe_t * fe) +{ + if (fe == NULL) + return 0; + if (--fe->refcount > 0) + return fe->refcount; + + /* kill FE instance - free everything... */ + if (fe->mel_fb) { + if (fe->mel_fb->mel_cosine) + fe_free_2d((void *) fe->mel_fb->mel_cosine); + ckd_free(fe->mel_fb->lifter); + ckd_free(fe->mel_fb->spec_start); + ckd_free(fe->mel_fb->filt_start); + ckd_free(fe->mel_fb->filt_width); + ckd_free(fe->mel_fb->filt_coeffs); + ckd_free(fe->mel_fb); + } + ckd_free(fe->spch); + ckd_free(fe->frame); + ckd_free(fe->ccc); + ckd_free(fe->sss); + ckd_free(fe->spec); + ckd_free(fe->mfspec); + ckd_free(fe->overflow_samps); + ckd_free(fe->hamming_window); + + if (fe->noise_stats) + fe_free_noisestats(fe->noise_stats); + + if (fe->vad_data) { + fe_prespch_free(fe->vad_data->prespch_buf); + ckd_free(fe->vad_data); + } + + cmd_ln_free_r(fe->config); + ckd_free(fe); + + return 0; +} + +/** + * Convert a block of mfcc_t to float32 (can be done in-place) + **/ +int32 +fe_mfcc_to_float(fe_t * fe, + mfcc_t ** input, float32 ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = MFCC2FLOAT(input[0][i]); + + return i; +} + +/** + * Convert a block of float32 to mfcc_t (can be done in-place) + **/ +int32 +fe_float_to_mfcc(fe_t * fe, + float32 ** input, mfcc_t ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = FLOAT2MFCC(input[0][i]); + + return i; +} + +int32 +fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_spec2cep(fe, fr_spec, fr_cep); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_spec2cep(fe, powspec, fr_cep); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_dct2(fe, fr_spec, fr_cep, 0); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_dct2(fe, powspec, fr_cep, 0); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec) +{ +#ifdef FIXED_POINT + fe_dct3(fe, fr_cep, fr_spec); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + fe_dct3(fe, fr_cep, powspec); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + fr_spec[i] = (mfcc_t) powspec[i]; + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h new file mode 100644 index 000000000..f6c943c72 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_internal.h @@ -0,0 +1,216 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __FE_INTERNAL_H__ +#define __FE_INTERNAL_H__ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_noise.h" +#include "fe_prespch_buf.h" +#include "fe_type.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Values for the 'logspec' field. */ +enum { + RAW_LOG_SPEC = 1, + SMOOTH_LOG_SPEC = 2 +}; + +/* Values for the 'transform' field. */ +enum { + LEGACY_DCT = 0, + DCT_II = 1, + DCT_HTK = 2 +}; + +typedef struct melfb_s melfb_t; +/** Base Struct to hold all structure for MFCC computation. */ +struct melfb_s { + float32 sampling_rate; + int32 num_cepstra; + int32 num_filters; + int32 fft_size; + float32 lower_filt_freq; + float32 upper_filt_freq; + /* DCT coefficients. */ + mfcc_t **mel_cosine; + /* Filter coefficients. */ + mfcc_t *filt_coeffs; + int16 *spec_start; + int16 *filt_start; + int16 *filt_width; + /* Luxury mobile home. */ + int32 doublewide; + char const *warp_type; + char const *warp_params; + uint32 warp_id; + /* Precomputed normalization constants for unitary DCT-II/DCT-III */ + mfcc_t sqrt_inv_n, sqrt_inv_2n; + /* Value and coefficients for HTK-style liftering */ + int32 lifter_val; + mfcc_t *lifter; + /* Normalize filters to unit area */ + int32 unit_area; + /* Round filter frequencies to DFT points (hurts accuracy, but is + useful for legacy purposes) */ + int32 round_filters; +}; + +typedef struct ringbuf_s { + powspec_t** bufs; + int16 buf_num; + int32 buf_len; + int16 start; + int16 end; + int32 recs; +} ringbuf_t; + +/* sqrt(1/2), also used for unitary DCT-II/DCT-III */ +#define SQRT_HALF FLOAT2MFCC(0.707106781186548) + +typedef struct vad_data_s { + uint8 global_state; + uint8 state_changed; + uint8 store_pcm; + int16 prespch_num; + int16 postspch_num; + prespch_buf_t* prespch_buf; +} vad_data_t; + +/** Structure for the front-end computation. */ +struct fe_s { + cmd_ln_t *config; + int refcount; + + int16 prespch_len; + int16 postspch_len; + float32 vad_threshold; + + float32 sampling_rate; + int16 frame_rate; + int16 frame_shift; + + float32 window_length; + int16 frame_size; + int16 fft_size; + + uint8 fft_order; + uint8 feature_dimension; + uint8 num_cepstra; + uint8 remove_dc; + uint8 log_spec; + uint8 swap; + uint8 dither; + uint8 transform; + uint8 remove_noise; + uint8 remove_silence; + + float32 pre_emphasis_alpha; + int32 seed; + + size_t sample_counter; + uint8 start_flag; + uint8 reserved; + + /* Twiddle factors for FFT. */ + frame_t *ccc, *sss; + /* Mel filter parameters. */ + melfb_t *mel_fb; + /* Half of a Hamming Window. */ + window_t *hamming_window; + /* Storage for noise removal */ + noise_stats_t *noise_stats; + + /* Storage for VAD variables */ + vad_data_t *vad_data; + + /* Temporary buffers for processing. */ + /* FIXME: too many of these. */ + int16 *spch; + frame_t *frame; + powspec_t *spec, *mfspec; + int16 *overflow_samps; + int16 num_overflow_samps; + int16 prior; +}; + +void fe_init_dither(int32 seed); + +/* Apply 1/2 bit noise to a buffer of audio. */ +int32 fe_dither(int16 *buffer, int32 nsamps); + +/* Load a frame of data into the fe. */ +int fe_read_frame(fe_t *fe, int16 const *in, int32 len); + +/* Shift the input buffer back and read more data. */ +int fe_shift_frame(fe_t *fe, int16 const *in, int32 len); + +/* Process a frame of data into features. */ +void fe_write_frame(fe_t *fe, mfcc_t *fea); + +/* Initialization functions. */ +int32 fe_build_melfilters(melfb_t *MEL_FB); +int32 fe_compute_melcosine(melfb_t *MEL_FB); +void fe_create_hamming(window_t *in, int32 in_len); +void fe_create_twiddle(fe_t *fe); + +fixed32 fe_log_add(fixed32 x, fixed32 y); +fixed32 fe_log_sub(fixed32 x, fixed32 y); + +/* Miscellaneous processing functions. */ +void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep); +void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk); +void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec); + +#ifdef __cplusplus +} +#endif + +#endif /* __FE_INTERNAL_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c new file mode 100644 index 000000000..4fb6d21a9 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.c @@ -0,0 +1,425 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* This noise removal algorithm is inspired by the following papers + * Computationally Efficient Speech Enchancement by Spectral Minina Tracking + * by G. Doblinger + * + * Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition + * by C. Kim. + * + * For the recent research and state of art see papers about IMRCA and + * A Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency + * Cepstra For Robust Speech Recognition by Dong Yu and others + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_noise.h" +#include "fe_internal.h" + +/* Noise supression constants */ +#define SMOOTH_WINDOW 4 +#define LAMBDA_POWER 0.7 +#define LAMBDA_A 0.995 +#define LAMBDA_B 0.5 +#define LAMBDA_T 0.85 +#define MU_T 0.2 +#define MAX_GAIN 20 + +struct noise_stats_s { + /* Smoothed power */ + powspec_t *power; + /* Noise estimate */ + powspec_t *noise; + /* Signal floor estimate */ + powspec_t *floor; + /* Peak for temporal masking */ + powspec_t *peak; + + /* Initialize it next time */ + uint8 undefined; + /* Number of items to process */ + uint32 num_filters; + + /* Precomputed constants */ + powspec_t lambda_power; + powspec_t comp_lambda_power; + powspec_t lambda_a; + powspec_t comp_lambda_a; + powspec_t lambda_b; + powspec_t comp_lambda_b; + powspec_t lambda_t; + powspec_t mu_t; + powspec_t max_gain; + powspec_t inv_max_gain; + + powspec_t smooth_scaling[2 * SMOOTH_WINDOW + 3]; +}; + +static void +fe_lower_envelope(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * floor_buf, int32 num_filt) +{ + int i; + + for (i = 0; i < num_filt; i++) { +#ifndef FIXED_POINT + if (buf[i] >= floor_buf[i]) { + floor_buf[i] = + noise_stats->lambda_a * floor_buf[i] + noise_stats->comp_lambda_a * buf[i]; + } + else { + floor_buf[i] = + noise_stats->lambda_b * floor_buf[i] + noise_stats->comp_lambda_b * buf[i]; + } +#else + if (buf[i] >= floor_buf[i]) { + floor_buf[i] = fe_log_add(noise_stats->lambda_a + floor_buf[i], + noise_stats->comp_lambda_a + buf[i]); + } + else { + floor_buf[i] = fe_log_add(noise_stats->lambda_b + floor_buf[i], + noise_stats->comp_lambda_b + buf[i]); + } +#endif + } +} + +/* temporal masking */ +static void +fe_temp_masking(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * peak, int32 num_filt) +{ + powspec_t cur_in; + int i; + + for (i = 0; i < num_filt; i++) { + cur_in = buf[i]; + +#ifndef FIXED_POINT + peak[i] *= noise_stats->lambda_t; + if (buf[i] < noise_stats->lambda_t * peak[i]) + buf[i] = peak[i] * noise_stats->mu_t; +#else + peak[i] += noise_stats->lambda_t; + if (buf[i] < noise_stats->lambda_t + peak[i]) + buf[i] = peak[i] + noise_stats->mu_t; +#endif + + if (cur_in > peak[i]) + peak[i] = cur_in; + } +} + +/* spectral weight smoothing */ +static void +fe_weight_smooth(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * coefs, int32 num_filt) +{ + int i, j; + int l1, l2; + powspec_t coef; + + for (i = 0; i < num_filt; i++) { + l1 = ((i - SMOOTH_WINDOW) > 0) ? (i - SMOOTH_WINDOW) : 0; + l2 = ((i + SMOOTH_WINDOW) < + (num_filt - 1)) ? (i + SMOOTH_WINDOW) : (num_filt - 1); + +#ifndef FIXED_POINT + coef = 0; + for (j = l1; j <= l2; j++) { + coef += coefs[j]; + } + buf[i] = buf[i] * (coef / (l2 - l1 + 1)); +#else + coef = MIN_FIXLOG; + for (j = l1; j <= l2; j++) { + coef = fe_log_add(coef, coefs[j]); + } + buf[i] = buf[i] + coef - noise_stats->smooth_scaling[l2 - l1 + 1]; +#endif + + } +} + +noise_stats_t * +fe_init_noisestats(int num_filters) +{ + int i; + noise_stats_t *noise_stats; + + noise_stats = (noise_stats_t *) ckd_calloc(1, sizeof(noise_stats_t)); + + noise_stats->power = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->noise = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->floor = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->peak = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + + noise_stats->undefined = TRUE; + noise_stats->num_filters = num_filters; + +#ifndef FIXED_POINT + noise_stats->lambda_power = LAMBDA_POWER; + noise_stats->comp_lambda_power = 1 - LAMBDA_POWER; + noise_stats->lambda_a = LAMBDA_A; + noise_stats->comp_lambda_a = 1 - LAMBDA_A; + noise_stats->lambda_b = LAMBDA_B; + noise_stats->comp_lambda_b = 1 - LAMBDA_B; + noise_stats->lambda_t = LAMBDA_T; + noise_stats->mu_t = MU_T; + noise_stats->max_gain = MAX_GAIN; + noise_stats->inv_max_gain = 1.0 / MAX_GAIN; + + for (i = 1; i < 2 * SMOOTH_WINDOW + 1; i++) { + noise_stats->smooth_scaling[i] = 1.0 / i; + } +#else + noise_stats->lambda_power = FLOAT2FIX(log(LAMBDA_POWER)); + noise_stats->comp_lambda_power = FLOAT2FIX(log(1 - LAMBDA_POWER)); + noise_stats->lambda_a = FLOAT2FIX(log(LAMBDA_A)); + noise_stats->comp_lambda_a = FLOAT2FIX(log(1 - LAMBDA_A)); + noise_stats->lambda_b = FLOAT2FIX(log(LAMBDA_B)); + noise_stats->comp_lambda_b = FLOAT2FIX(log(1 - LAMBDA_B)); + noise_stats->lambda_t = FLOAT2FIX(log(LAMBDA_T)); + noise_stats->mu_t = FLOAT2FIX(log(MU_T)); + noise_stats->max_gain = FLOAT2FIX(log(MAX_GAIN)); + noise_stats->inv_max_gain = FLOAT2FIX(log(1.0 / MAX_GAIN)); + + for (i = 1; i < 2 * SMOOTH_WINDOW + 3; i++) { + noise_stats->smooth_scaling[i] = FLOAT2FIX(log(i)); + } +#endif + + return noise_stats; +} + +void +fe_reset_noisestats(noise_stats_t * noise_stats) +{ + if (noise_stats) + noise_stats->undefined = TRUE; +} + +void +fe_free_noisestats(noise_stats_t * noise_stats) +{ + ckd_free(noise_stats->power); + ckd_free(noise_stats->noise); + ckd_free(noise_stats->floor); + ckd_free(noise_stats->peak); + ckd_free(noise_stats); +} + +/** + * For fixed point we are doing the computation in a fixlog domain, + * so we have to add many processing cases. + */ +void +fe_track_snr(fe_t * fe, int32 *in_speech) +{ + powspec_t *signal; + powspec_t *gain; + noise_stats_t *noise_stats; + powspec_t *mfspec; + int32 i, num_filts; + powspec_t lrt, snr, max_signal, log_signal; + + if (!(fe->remove_noise || fe->remove_silence)) { + *in_speech = TRUE; + return; + } + + noise_stats = fe->noise_stats; + mfspec = fe->mfspec; + num_filts = noise_stats->num_filters; + + signal = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); + + if (noise_stats->undefined) { + for (i = 0; i < num_filts; i++) { + noise_stats->power[i] = mfspec[i]; + noise_stats->noise[i] = mfspec[i]; +#ifndef FIXED_POINT + noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain; + noise_stats->peak[i] = 0.0; +#else + noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain; + noise_stats->peak[i] = MIN_FIXLOG; +#endif + } + noise_stats->undefined = FALSE; + } + + /* Calculate smoothed power */ + for (i = 0; i < num_filts; i++) { +#ifndef FIXED_POINT + noise_stats->power[i] = + noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i]; +#else + noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i], + noise_stats->comp_lambda_power + mfspec[i]); +#endif + } + + /* Noise estimation and vad decision */ + fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts); + + lrt = FLOAT2FIX(0.0f); + max_signal = FLOAT2FIX(0.0f); + for (i = 0; i < num_filts; i++) { +#ifndef FIXED_POINT + signal[i] = noise_stats->power[i] - noise_stats->noise[i]; + if (signal[i] < 1.0) + signal[i] = 1.0; + snr = log(noise_stats->power[i] / noise_stats->noise[i]); + log_signal = log(signal[i]); +#else + signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]); + snr = noise_stats->power[i] - noise_stats->noise[i]; + log_signal = signal[i]; +#endif + if (snr > lrt) { + lrt = snr; + if (log_signal > max_signal) { + max_signal = log_signal; + } + } + } + +#ifndef FIXED_POINT + if (fe->remove_silence && (lrt < fe->vad_threshold || max_signal < fe->vad_threshold)) { +#else + if (fe->remove_silence && (lrt < FLOAT2FIX(fe->vad_threshold) || max_signal < FLOAT2FIX(fe->vad_threshold))) { +#endif + *in_speech = FALSE; + } else { + *in_speech = TRUE; + } + + fe_lower_envelope(noise_stats, signal, noise_stats->floor, num_filts); + + fe_temp_masking(noise_stats, signal, noise_stats->peak, num_filts); + + if (!fe->remove_noise) { + //no need for further calculations if noise cancellation disabled + ckd_free(signal); + return; + } + + for (i = 0; i < num_filts; i++) { + if (signal[i] < noise_stats->floor[i]) + signal[i] = noise_stats->floor[i]; + } + + gain = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); +#ifndef FIXED_POINT + for (i = 0; i < num_filts; i++) { + if (signal[i] < noise_stats->max_gain * noise_stats->power[i]) + gain[i] = signal[i] / noise_stats->power[i]; + else + gain[i] = noise_stats->max_gain; + if (gain[i] < noise_stats->inv_max_gain) + gain[i] = noise_stats->inv_max_gain; + } +#else + for (i = 0; i < num_filts; i++) { + gain[i] = signal[i] - noise_stats->power[i]; + if (gain[i] > noise_stats->max_gain) + gain[i] = noise_stats->max_gain; + if (gain[i] < noise_stats->inv_max_gain) + gain[i] = noise_stats->inv_max_gain; + } +#endif + + /* Weight smoothing and time frequency normalization */ + fe_weight_smooth(noise_stats, mfspec, gain, num_filts); + + ckd_free(gain); + ckd_free(signal); +} + +void +fe_vad_hangover(fe_t * fe, mfcc_t * fea, int32 is_speech) +{ + /* track vad state and deal with cepstrum prespeech buffer */ + fe->vad_data->state_changed = 0; + if (is_speech) { + fe->vad_data->postspch_num = 0; + if (!fe->vad_data->global_state) { + fe->vad_data->prespch_num++; + fe_prespch_write_cep(fe->vad_data->prespch_buf, fea); + /* check for transition sil->speech */ + if (fe->vad_data->prespch_num >= fe->prespch_len) { + fe->vad_data->prespch_num = 0; + fe->vad_data->global_state = 1; + /* transition silence->speech occurred */ + fe->vad_data->state_changed = 1; + } + } + } else { + fe->vad_data->prespch_num = 0; + fe_prespch_reset_cep(fe->vad_data->prespch_buf); + if (fe->vad_data->global_state) { + fe->vad_data->postspch_num++; + /* check for transition speech->sil */ + if (fe->vad_data->postspch_num >= fe->postspch_len) { + fe->vad_data->postspch_num = 0; + fe->vad_data->global_state = 0; + /* transition speech->silence occurred */ + fe->vad_data->state_changed = 1; + } + } + } + + if (fe->vad_data->store_pcm) { + if (is_speech || fe->vad_data->global_state) + fe_prespch_write_pcm(fe->vad_data->prespch_buf, fe->spch); + if (!is_speech && !fe->vad_data->global_state) + fe_prespch_reset_pcm(fe->vad_data->prespch_buf); + } +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_noise.h b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.h new file mode 100644 index 000000000..b633a4cec --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_noise.h @@ -0,0 +1,66 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef FE_NOISE_H +#define FE_NOISE_H + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" +#include "fe_type.h" + +typedef struct noise_stats_s noise_stats_t; + +/* Creates noisestats object */ +noise_stats_t *fe_init_noisestats(int num_filters); + +/* Resets collected noise statistics */ +void fe_reset_noisestats(noise_stats_t * noise_stats); + +/* Frees allocated data */ +void fe_free_noisestats(noise_stats_t * noise_stats); + +/** + * Process frame, update noise statistics, remove noise components if needed, + * and return local vad decision. + */ +void fe_track_snr(fe_t *fe, int32 *in_speech); + +/** + * Updates global state based on local VAD state smoothing the estimate. + */ +void fe_vad_hangover(fe_t *fe, mfcc_t *fea, int32 is_speech); + +#endif /* FE_NOISE_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c new file mode 100644 index 000000000..028c09ac5 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.c @@ -0,0 +1,182 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== +* Copyright (c) 2013 Carnegie Mellon University. All rights +* reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* +* This work was supported in part by funding from the Defense Advanced +* Research Projects Agency and the National Science Foundation of the +* United States of America, and the CMU Sphinx Speech Consortium. +* +* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND +* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY +* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* ==================================================================== +* +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +#include "fe_prespch_buf.h" + +struct prespch_buf_s { + /* saved mfcc frames */ + mfcc_t **cep_buf; + /* saved pcm audio */ + int16 *pcm_buf; + /* flag for pcm buffer initialization */ + int16 cep_write_ptr; + /* read pointer for cep buffer */ + int16 cep_read_ptr; + /* write pointer for pcm buffer */ + int16 pcm_write_ptr; + /* frames amount in cep buffer */ + int16 num_frames_cep; + /* frames amount in pcm buffer */ + int16 num_frames_pcm; + /* filters amount */ + int16 num_cepstra; + /* amount of fresh samples in frame */ + int16 num_samples; +}; + +prespch_buf_t * +fe_prespch_init(int num_frames, int num_cepstra, int num_samples) +{ + prespch_buf_t *prespch_buf; + + prespch_buf = (prespch_buf_t *) ckd_calloc(1, sizeof(prespch_buf_t)); + + prespch_buf->num_cepstra = num_cepstra; + prespch_buf->num_frames_cep = num_frames; + prespch_buf->num_samples = num_samples; + prespch_buf->num_frames_pcm = 0; + prespch_buf->cep_write_ptr = 0; + prespch_buf->cep_read_ptr = 0; + prespch_buf->pcm_write_ptr = 0; + + prespch_buf->cep_buf = (mfcc_t **) + ckd_calloc_2d(num_frames, num_cepstra, + sizeof(**prespch_buf->cep_buf)); + + return prespch_buf; +} + +void +fe_prespch_extend_pcm(prespch_buf_t* prespch_buf, int num_frames_pcm) +{ + num_frames_pcm += prespch_buf->num_frames_cep + 1; + if (num_frames_pcm > prespch_buf->num_frames_pcm) { + prespch_buf->num_frames_pcm = num_frames_pcm; + prespch_buf->pcm_buf = (int16 *) ckd_realloc(prespch_buf->pcm_buf, prespch_buf->num_frames_pcm * prespch_buf->num_samples * sizeof(int16)); + } +} + +int +fe_prespch_read_cep(prespch_buf_t * prespch_buf, mfcc_t * fea) +{ + if (prespch_buf->cep_read_ptr >= prespch_buf->num_frames_cep) + return 0; + if (prespch_buf->cep_read_ptr >= prespch_buf->cep_write_ptr) + return 0; + memcpy(fea, prespch_buf->cep_buf[prespch_buf->cep_read_ptr], + sizeof(mfcc_t) * prespch_buf->num_cepstra); + prespch_buf->cep_read_ptr++; + return 1; +} + +void +fe_prespch_write_cep(prespch_buf_t * prespch_buf, mfcc_t * fea) +{ + assert(prespch_buf->cep_write_ptr < prespch_buf->num_frames_cep); + memcpy(prespch_buf->cep_buf[prespch_buf->cep_write_ptr], fea, + sizeof(mfcc_t) * prespch_buf->num_cepstra); + prespch_buf->cep_write_ptr++; +} + +void +fe_prespch_read_pcm(prespch_buf_t * prespch_buf, int16 ** samples, + int32 * samples_num) +{ + if (!prespch_buf->pcm_buf) { + /* pcm prespch buffer isn't initialized yet */ + samples = NULL; + *samples_num = 0; + return; + } + *samples = prespch_buf->pcm_buf; + *samples_num = prespch_buf->pcm_write_ptr * prespch_buf->num_samples; + prespch_buf->pcm_write_ptr = 0; +} + +void +fe_prespch_write_pcm(prespch_buf_t * prespch_buf, int16 * samples) +{ + int32 sample_ptr; + + assert(prespch_buf->pcm_write_ptr < prespch_buf->num_frames_pcm); + sample_ptr = prespch_buf->pcm_write_ptr * prespch_buf->num_samples; + memcpy(&prespch_buf->pcm_buf[sample_ptr], samples, + prespch_buf->num_samples * sizeof(int16)); + prespch_buf->pcm_write_ptr++; +} + +void +fe_prespch_reset_cep(prespch_buf_t * prespch_buf) +{ + prespch_buf->cep_read_ptr = 0; + prespch_buf->cep_write_ptr = 0; +} + +void +fe_prespch_reset_pcm(prespch_buf_t * prespch_buf) +{ + prespch_buf->pcm_write_ptr = 0; +} + +void +fe_prespch_free(prespch_buf_t * prespch_buf) +{ + if (!prespch_buf) + return; + if (prespch_buf->cep_buf) + ckd_free_2d((void **) prespch_buf->cep_buf); + if (prespch_buf->pcm_buf) + ckd_free(prespch_buf->pcm_buf); + ckd_free(prespch_buf); +} + +int32 +fe_prespch_ncep(prespch_buf_t * prespch_buf) +{ + return prespch_buf->cep_write_ptr - prespch_buf->cep_read_ptr; +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h new file mode 100644 index 000000000..d349ddfdb --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_prespch_buf.h @@ -0,0 +1,79 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* Buffer that maintains both features and raw audio for the VAD implementation */ + +#ifndef FE_INTERNAL_H +#define FE_INTERNAL_H + +#include "sphinxbase/fe.h" + +typedef struct prespch_buf_s prespch_buf_t; + +/* Creates prespeech buffer */ +prespch_buf_t *fe_prespch_init(int num_frames, int num_cepstra, + int num_samples); + +/* Extends pcm prespeech buffer with specified amount of frames */ +void fe_prespch_extend_pcm(prespch_buf_t* prespch_buf, int num_frames_pcm); + +/* Reads mfcc frame from prespeech buffer */ +int fe_prespch_read_cep(prespch_buf_t * prespch_buf, mfcc_t * fea); + +/* Writes mfcc frame to prespeech buffer */ +void fe_prespch_write_cep(prespch_buf_t * prespch_buf, mfcc_t * fea); + +/* Reads pcm frame from prespeech buffer */ +void fe_prespch_read_pcm(prespch_buf_t * prespch_buf, int16 ** samples, + int32 * samples_num); + +/* Writes pcm frame to prespeech buffer */ +void fe_prespch_write_pcm(prespch_buf_t * prespch_buf, int16 * samples); + +/* Resets read/write pointers for cepstrum buffer */ +void fe_prespch_reset_cep(prespch_buf_t * prespch_buf); + +/* Resets read/write pointer for pcm audio buffer */ +void fe_prespch_reset_pcm(prespch_buf_t * prespch_buf); + +/* Releases prespeech buffer */ +void fe_prespch_free(prespch_buf_t * prespch_buf); + +/* Returns number of accumulated frames */ +int32 fe_prespch_ncep(prespch_buf_t * prespch_buf); + +#endif /* FE_INTERNAL_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c b/media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c new file mode 100644 index 000000000..577640f62 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_sigproc.c @@ -0,0 +1,1377 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +/** + * Windows math.h does not contain M_PI + */ +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/fe.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +/* Use extra precision for cosines, Hamming window, pre-emphasis + * coefficient, twiddle factors. */ +#ifdef FIXED_POINT +#define FLOAT2COS(x) FLOAT2FIX_ANY(x,30) +#define COSMUL(x,y) FIXMUL_ANY(x,y,30) +#else +#define FLOAT2COS(x) (x) +#define COSMUL(x,y) ((x)*(y)) +#endif + +#ifdef FIXED_POINT + +/* Internal log-addition table for natural log with radix point at 8 + * bits. Each entry is 256 * log(1 + e^{-n/256}). This is used in the + * log-add computation: + * + * e^z = e^x + e^y + * e^z = e^x(1 + e^{y-x}) = e^y(1 + e^{x-y}) + * z = x + log(1 + e^{y-x}) = y + log(1 + e^{x-y}) + * + * So when y > x, z = y + logadd_table[-(x-y)] + * when x > y, z = x + logadd_table[-(y-x)] + */ +static const unsigned char fe_logadd_table[] = { + 177, 177, 176, 176, 175, 175, 174, 174, 173, 173, + 172, 172, 172, 171, 171, 170, 170, 169, 169, 168, + 168, 167, 167, 166, 166, 165, 165, 164, 164, 163, + 163, 162, 162, 161, 161, 161, 160, 160, 159, 159, + 158, 158, 157, 157, 156, 156, 155, 155, 155, 154, + 154, 153, 153, 152, 152, 151, 151, 151, 150, 150, + 149, 149, 148, 148, 147, 147, 147, 146, 146, 145, + 145, 144, 144, 144, 143, 143, 142, 142, 141, 141, + 141, 140, 140, 139, 139, 138, 138, 138, 137, 137, + 136, 136, 136, 135, 135, 134, 134, 134, 133, 133, + 132, 132, 131, 131, 131, 130, 130, 129, 129, 129, + 128, 128, 128, 127, 127, 126, 126, 126, 125, 125, + 124, 124, 124, 123, 123, 123, 122, 122, 121, 121, + 121, 120, 120, 119, 119, 119, 118, 118, 118, 117, + 117, 117, 116, 116, 115, 115, 115, 114, 114, 114, + 113, 113, 113, 112, 112, 112, 111, 111, 110, 110, + 110, 109, 109, 109, 108, 108, 108, 107, 107, 107, + 106, 106, 106, 105, 105, 105, 104, 104, 104, 103, + 103, 103, 102, 102, 102, 101, 101, 101, 100, 100, + 100, 99, 99, 99, 98, 98, 98, 97, 97, 97, + 96, 96, 96, 96, 95, 95, 95, 94, 94, 94, + 93, 93, 93, 92, 92, 92, 92, 91, 91, 91, + 90, 90, 90, 89, 89, 89, 89, 88, 88, 88, + 87, 87, 87, 87, 86, 86, 86, 85, 85, 85, + 85, 84, 84, 84, 83, 83, 83, 83, 82, 82, + 82, 82, 81, 81, 81, 80, 80, 80, 80, 79, + 79, 79, 79, 78, 78, 78, 78, 77, 77, 77, + 77, 76, 76, 76, 75, 75, 75, 75, 74, 74, + 74, 74, 73, 73, 73, 73, 72, 72, 72, 72, + 71, 71, 71, 71, 71, 70, 70, 70, 70, 69, + 69, 69, 69, 68, 68, 68, 68, 67, 67, 67, + 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, + 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, + 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, + 60, 60, 60, 60, 59, 59, 59, 59, 59, 58, + 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, + 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, + 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, + 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, + 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, + 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, + 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, + 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, + 44, 44, 43, 43, 43, 43, 43, 43, 43, 42, + 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, + 41, 41, 40, 40, 40, 40, 40, 40, 40, 39, + 39, 39, 39, 39, 39, 39, 38, 38, 38, 38, + 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, + 37, 36, 36, 36, 36, 36, 36, 36, 35, 35, + 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, + 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, + 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0 +}; + +static const int fe_logadd_table_size = + sizeof(fe_logadd_table) / sizeof(fe_logadd_table[0]); + +fixed32 +fe_log_add(fixed32 x, fixed32 y) +{ + fixed32 d, r; + + if (x > y) { + d = (x - y) >> (DEFAULT_RADIX - 8); + r = x; + } + else { + d = (y - x) >> (DEFAULT_RADIX - 8); + r = y; + } + + if (r <= MIN_FIXLOG) + return MIN_FIXLOG; + else if (d > fe_logadd_table_size - 1) + return r; + else { + r += ((fixed32) fe_logadd_table[d] << (DEFAULT_RADIX - 8)); +/* printf("%d - %d = %d | %f - %f = %f | %f - %f = %f\n", + x, y, r, FIX2FLOAT(x), FIX2FLOAT(y), FIX2FLOAT(r), + exp(FIX2FLOAT(x)), exp(FIX2FLOAT(y)), exp(FIX2FLOAT(r))); +*/ + return r; + } +} + +/* + * log_sub for spectral subtraction, similar to logadd but we had + * to smooth function around zero with fixlog in order to improve + * table interpolation properties + * + * The table is created with the file included into distribution + * + * e^z = e^x - e^y + * e^z = e^x (1 - e^(-(x - y))) + * z = x + log(1 - e^(-(x - y))) + * z = x + fixlog(a) + (log(1 - e^(- a)) - log(a)) + * + * Input radix is 8 output radix is 10 + */ +static const uint16 fe_logsub_table[] = { +1, 3, 5, 7, 9, 11, 13, 15, 17, 19, +21, 23, 25, 27, 29, 31, 33, 35, 37, 39, +41, 43, 45, 47, 49, 51, 53, 55, 56, 58, +60, 62, 64, 66, 68, 70, 72, 74, 76, 78, +80, 82, 84, 86, 88, 90, 92, 94, 95, 97, +99, 101, 103, 105, 107, 109, 111, 113, 115, 117, +119, 121, 122, 124, 126, 128, 130, 132, 134, 136, +138, 140, 142, 143, 145, 147, 149, 151, 153, 155, +157, 159, 161, 162, 164, 166, 168, 170, 172, 174, +176, 178, 179, 181, 183, 185, 187, 189, 191, 193, +194, 196, 198, 200, 202, 204, 206, 207, 209, 211, +213, 215, 217, 219, 220, 222, 224, 226, 228, 230, +232, 233, 235, 237, 239, 241, 243, 244, 246, 248, +250, 252, 254, 255, 257, 259, 261, 263, 265, 266, +268, 270, 272, 274, 275, 277, 279, 281, 283, 284, +286, 288, 290, 292, 294, 295, 297, 299, 301, 302, +304, 306, 308, 310, 311, 313, 315, 317, 319, 320, +322, 324, 326, 327, 329, 331, 333, 335, 336, 338, +340, 342, 343, 345, 347, 349, 350, 352, 354, 356, +357, 359, 361, 363, 364, 366, 368, 370, 371, 373, +375, 377, 378, 380, 382, 384, 385, 387, 389, 391, +392, 394, 396, 397, 399, 401, 403, 404, 406, 408, +410, 411, 413, 415, 416, 418, 420, 422, 423, 425, +427, 428, 430, 432, 433, 435, 437, 439, 440, 442, +444, 445, 447, 449, 450, 452, 454, 455, 457, 459, +460, 462, 464, 465, 467, 469, 471, 472, 474, 476, +477, 479, 481, 482, 484, 486, 487, 489, 490, 492, +494, 495, 497, 499, 500, 502, 504, 505, 507, 509, +510, 512, 514, 515, 517, 518, 520, 522, 523, 525, +527, 528, 530, 532, 533, 535, 536, 538, 540, 541, +543, 544, 546, 548, 549, 551, 553, 554, 556, 557, +559, 561, 562, 564, 565, 567, 569, 570, 572, 573, +575, 577, 578, 580, 581, 583, 585, 586, 588, 589, +591, 592, 594, 596, 597, 599, 600, 602, 603, 605, +607, 608, 610, 611, 613, 614, 616, 618, 619, 621, +622, 624, 625, 627, 628, 630, 632, 633, 635, 636, +638, 639, 641, 642, 644, 645, 647, 649, 650, 652, +653, 655, 656, 658, 659, 661, 662, 664, 665, 667, +668, 670, 671, 673, 674, 676, 678, 679, 681, 682, +684, 685, 687, 688, 690, 691, 693, 694, 696, 697, +699, 700, 702, 703, 705, 706, 708, 709, 711, 712, +714, 715, 717, 718, 719, 721, 722, 724, 725, 727, +728, 730, 731, 733, 734, 736, 737, 739, 740, 742, +743, 745, 746, 747, 749, 750, 752, 753, 755, 756, +758, 759, 761, 762, 763, 765, 766, 768, 769, 771, +772, 774, 775, 776, 778, 779, 781, 782, 784, 785, +786, 788, 789, 791, 792, 794, 795, 796, 798, 799, +801, 802, 804, 805, 806, 808, 809, 811, 812, 813, +815, 816, 818, 819, 820, 822, 823, 825, 826, 827, +829, 830, 832, 833, 834, 836, 837, 839, 840, 841, +843, 844, 846, 847, 848, 850, 851, 852, 854, 855, +857, 858, 859, 861, 862, 863, 865, 866, 868, 869, +870, 872, 873, 874, 876, 877, 878, 880, 881, 883, +884, 885, 887, 888, 889, 891, 892, 893, 895, 896, +897, 899, 900, 901, 903, 904, 905, 907, 908, 909, +911, 912, 913, 915, 916, 917, 919, 920, 921, 923, +924, 925, 927, 928, 929, 931, 932, 933, 935, 936, +937, 939, 940, 941, 942, 944, 945, 946, 948, 949, +950, 952, 953, 954, 956, 957, 958, 959, 961, 962, +963, 965, 966, 967, 968, 970, 971, 972, 974, 975, +976, 977, 979, 980, 981, 983, 984, 985, 986, 988, +989, 990, 991, 993, 994, 995, 997, 998, 999, 1000, +1002, 1003, 1004, 1005, 1007, 1008, 1009, 1010, 1012, 1013, +1014, 1015, 1017, 1018, 1019, 1020, 1022, 1023, 1024, 1025, +1027, 1028, 1029, 1030, 1032, 1033, 1034, 1035, 1037, 1038, +1039, 1040, 1041, 1043, 1044, 1045, 1046, 1048, 1049, 1050, +1051, 1052, 1054, 1055, 1056, 1057, 1059, 1060, 1061, 1062, +1063, 1065, 1066, 1067, 1068, 1069, 1071, 1072, 1073, 1074, +1076, 1077, 1078, 1079, 1080, 1082, 1083, 1084, 1085, 1086, +1087, 1089, 1090, 1091, 1092, 1093, 1095, 1096, 1097, 1098, +1099, 1101, 1102, 1103, 1104, 1105, 1106, 1108, 1109, 1110, +1111, 1112, 1114, 1115, 1116, 1117, 1118, 1119, 1121, 1122, +1123, 1124, 1125, 1126, 1128, 1129, 1130, 1131, 1132, 1133, +1135, 1136, 1137, 1138, 1139, 1140, 1141, 1143, 1144, 1145, +1146, 1147, 1148, 1149, 1151, 1152, 1153, 1154, 1155, 1156, +1157, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1167, 1168, +1169, 1170, 1171, 1172, 1173, 1174, 1176, 1177, 1178, 1179, +1180, 1181, 1182, 1183, 1185, 1186, 1187, 1188, 1189, 1190, +1191, 1192, 1193, 1195, 1196, 1197, 1198, 1199, 1200, 1201, +1202, 1203, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, +1213, 1214, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, +1224, 1225, 1226, 1228, 1229, 1230, 1231, 1232, 1233, 1234, +1235, 1236, 1237, 1238, 1239, 1240, 1242, 1243, 1244, 1245, +1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, +1256, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, +1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1277, +1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, +1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, +1298, 1299, 1300, 1301, 1302, 1303, 1305, 1306, 1307, 1308, +1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, +1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, +1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, +1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, +1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, +1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, +1369, 1370, 1371, 1372, 1372, 1373, 1374, 1375, 1376, 1377, +1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, +1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, +1398, 1399, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, +1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, +1417, 1418, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, +1426, 1427, 1428, 1429, 1430, 1431, 1432, 1432, 1433, 1434, +1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, +1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, +1454, 1455, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, +1463, 1464, 1465, 1466, 1466, 1467, 1468, 1469, 1470, 1471, +1472, 1473, 1474, 1475, 1475, 1476, 1477, 1478, 1479, 1480, +1481, 1482, 1483, 1483, 1484, 1485, 1486, 1487, 1488, 1489, +1490, 1491, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, +1499, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1506, +1507, 1508, 1509, 1510, 1511, 1512, 1513, 1513, 1514, 1515, +1516, 1517, 1518, 1519, 1520, 1520, 1521, 1522, 1523, 1524, +1525, 1526, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1532, +1533, 1534, 1535, 1536, 1537, 1538, 1538, 1539, 1540, 1541, +1542, 1543, 1544, 1544, 1545, 1546, 1547, 1548, 1549, 1550, +1550, 1551, 1552, 1553, 1554, 1555, 1555, 1556, 1557, 1558, +1559, 1560, 1560, 1561, 1562, 1563, 1564, 1565, 1565, 1566, +1567, 1568, 1569, 1570, 1570, 1571, 1572, 1573, 1574, 1575, +1575, 1576, 1577, 1578, 1579, 1580, 1580, 1581, 1582, 1583, +1584, 1584, 1585, 1586, 1587, 1588, 1589, 1589, 1590, 1591, +1592, 1593, 1593, 1594, 1595, 1596, 1597, 1598, 1598, 1599, +1600, 1601, 1602, 1602, 1603, 1604, 1605, 1606, 1606, 1607, +1608, 1609, 1610, 1610, 1611, 1612, 1613, 1614, 1614, 1615, +1616, 1617, 1618, 1618, 1619, 1620, 1621, 1622, 1622, 1623, +1624, 1625, 1626, 1626, 1627, 1628, 1629, 1630, 1630, 1631, +1632, 1633, 1634, 1634, 1635, 1636, 1637, 1637, 1638, 1639, +1640, 1641, 1641, 1642, 1643, 1644, 1645, 1645, 1646, 1647, +1648, 1648, 1649, 1650, 1651, 1652, 1652, 1653, 1654, 1655, +1655, 1656, 1657, 1658, 1658, 1659, 1660, 1661, 1662, 1662, +1663, 1664, 1665, 1665, 1666, 1667, 1668, 1668, 1669, 1670, +1671, 1671, 1672, 1673, 1674, 1675, 1675, 1676, 1677, 1678, +1678, 1679, 1680, 1681, 1681, 1682, 1683, 1684, 1684, 1685, +1686, 1687, 1687, 1688, 1689, 1690, 1690, 1691, 1692, 1693, +1693, 1694, 1695, 1696, 1696, 1697, 1698, 1699, 1699, 1700, +1701, 1702, 1702, 1703, 1704, 1705, 1705, 1706, 1707, 1707, +1708, 1709, 1710, 1710, 1711, 1712, 1713, 1713, 1714, 1715, +1716, 1716, 1717, 1718, 1718, 1719, 1720, 1721, 1721, 1722, +1723, 1724, 1724, 1725, 1726, 1727, 1727, 1728, 1729, 1729, +1730, 1731, 1732, 1732, 1733, 1734, 1734, 1735, 1736, 1737, +1737, 1738, 1739, 1740, 1740, 1741, 1742, 1742, 1743, 1744, +1745, 1745, 1746, 1747, 1747, 1748, 1749, 1749, 1750, 1751, +1752, 1752, 1753, 1754, 1754, 1755, 1756, 1757, 1757, 1758, +1759, 1759, 1760, 1761, 1762, 1762, 1763, 1764, 1764, 1765, +1766, 1766, 1767, 1768, 1769, 1769, 1770, 1771, 1771, 1772, +1773, 1773, 1774, 1775, 1776, 1776, 1777, 1778, 1778, 1779, +1780, 1780, 1781, 1782, 1782, 1783, 1784, 1784, 1785, 1786, +1787, 1787, 1788, 1789, 1789, 1790, 1791, 1791, 1792, 1793, +1793, 1794, 1795, 1795, 1796, 1797, 1798, 1798, 1799, 1800, +1800, 1801, 1802, 1802, 1803, 1804, 1804, 1805, 1806, 1806, +1807, 1808, 1808, 1809, 1810, 1810, 1811, 1812, 1812, 1813, +1814, 1814, 1815, 1816, 1816, 1817, 1818, 1818, 1819, 1820, +1820, 1821, 1822, 1822, 1823, 1824, 1824, 1825, 1826, 1826, +1827, 1828, 1828, 1829, 1830, 1830, 1831, 1832, 1832, 1833, +1834, 1834, 1835, 1836, 1836, 1837, 1838, 1838, 1839, 1840, +1840, 1841, 1842, 1842, 1843, 1844, 1844, 1845, 1845, 1846, +1847, 1847, 1848, 1849, 1849, 1850, 1851, 1851, 1852, 1853, +1853, 1854, 1855, 1855, 1856, 1857, 1857, 1858, 1858, 1859, +1860, 1860, 1861, 1862, 1862, 1863, 1864, 1864, 1865, 1866, +1866, 1867, 1867, 1868, 1869, 1869, 1870, 1871, 1871, 1872, +1873, 1873, 1874, 1874, 1875, 1876, 1876, 1877, 1878, 1878, +1879, 1879, 1880, 1881, 1881, 1882, 1883, 1883, 1884, 1885, +1885, 1886, 1886, 1887, 1888, 1888, 1889, 1890, 1890, 1891, +1891, 1892, 1893, 1893, 1894, 1895, 1895, 1896, 1896, 1897, +1898, 1898, 1899, 1900, 1900, 1901, 1901, 1902, 1903, 1903, +1904, 1904, 1905, 1906, 1906, 1907, 1908, 1908, 1909, 1909, +1910, 1911, 1911, 1912, 1912, 1913, 1914, 1914, 1915, 1916, +1916, 1917, 1917, 1918, 1919, 1919, 1920, 1920, 1921, 1922, +1922, 1923, 1923, 1924, 1925, 1925, 1926, 1926, 1927, 1928, +1928, 1929, 1929, 1930, 1931, 1931, 1932, 1932, 1933, 1934, +1934, 1935, 1935, 1936, 1937, 1937, 1938, 1938, 1939, 1940, +1940, 1941, 1941, 1942, 1943, 1943, 1944, 1944, 1945, 1946, +1946, 1947, 1947, 1948, 1949, 1949, 1950, 1950, 1951, 1952, +1952, 1953, 1953, 1954, 1955, 1955, 1956, 1956, 1957, 1957, +1958, 1959, 1959, 1960, 1960, 1961, 1962, 1962, 1963, 1963, +1964, 1964, 1965, 1966, 1966, 1967, 1967, 1968, 1969, 1969, +1970, 1970, 1971, 1971, 1972, 1973, 1973, 1974, 1974, 1975, +1976, 1976, 1977, 1977, 1978, 1978, 1979, 1980, 1980, 1981, +1981, 1982, 1982, 1983, 1984, 1984, 1985, 1985, 1986, 1986, +1987, 1988, 1988, 1989, 1989, 1990, 1990, 1991, 1992, 1992, +1993, 1993, 1994, 1994, 1995, 1996, 1996, 1997, 1997, 1998, +1998, 1999, 1999, 2000, 2001, 2001, 2002, 2002, 2003, 2003, +2004, 2005, 2005, 2006, 2006, 2007, 2007, 2008, 2008, 2009, +2010, 2010, 2011, 2011, 2012, 2012, 2013, 2014, 2014, 2015, +2015, 2016, 2016, 2017, 2017, 2018, 2019, 2019, 2020, 2020, +2021, 2021, 2022, 2022, 2023, 2023, 2024, 2025, 2025, 2026, +2026, 2027, 2027, 2028, 2028, 2029, 2030, 2030, 2031, 2031, +2032, 2032, 2033, 2033, 2034, 2034, 2035, 2036, 2036, 2037, +2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2042, 2042, +2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2048, +2048, 2049, 2049, 2050, 2050, 2051, 2051, 2052, 2052, 2053, +2053, 2054, 2054, 2055, 2056, 2056, 2057, 2057, 2058, 2058, +2059, 2059, 2060, 2060, 2061, 2061, 2062, 2062, 2063, 2064, +2064, 2065, 2065, 2066, 2066, 2067, 2067, 2068, 2068, 2069, +2069, 2070, 2070, 2071, 2072, 2072, 2073, 2073, 2074, 2074, +2075, 2075, 2076, 2076, 2077, 2077, 2078, 2078, 2079, 2079, +2080, 2080, 2081 +}; + +static const int fe_logsub_table_size = + sizeof(fe_logsub_table) / sizeof(fe_logsub_table[0]); + +fixed32 +fe_log_sub(fixed32 x, fixed32 y) +{ + fixed32 d, r; + + if (x < MIN_FIXLOG || y >= x) + return MIN_FIXLOG; + + d = (x - y) >> (DEFAULT_RADIX - 8); + + if (d > fe_logsub_table_size - 1) + return x; + + r = fe_logsub_table[d] << (DEFAULT_RADIX - 10); +/* + printf("diff=%d\n", + x + FIXLN(x-y) - r - + (x + FLOAT2FIX(logf(-expm1f(FIX2FLOAT(y - x)))))); +*/ + return x + FIXLN(x-y) - r; +} + +static fixed32 +fe_log(float32 x) +{ + if (x <= 0) { + return MIN_FIXLOG; + } + else { + return FLOAT2FIX(log(x)); + } +} +#endif + +static float32 +fe_mel(melfb_t * mel, float32 x) +{ + float32 warped = fe_warp_unwarped_to_warped(mel, x); + + return (float32) (2595.0 * log10(1.0 + warped / 700.0)); +} + +static float32 +fe_melinv(melfb_t * mel, float32 x) +{ + float32 warped = (float32) (700.0 * (pow(10.0, x / 2595.0) - 1.0)); + return fe_warp_warped_to_unwarped(mel, warped); +} + +int32 +fe_build_melfilters(melfb_t * mel_fb) +{ + float32 melmin, melmax, melbw, fftfreq; + int n_coeffs, i, j; + + + /* Filter coefficient matrix, in flattened form. */ + mel_fb->spec_start = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->spec_start)); + mel_fb->filt_start = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->filt_start)); + mel_fb->filt_width = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->filt_width)); + + /* First calculate the widths of each filter. */ + /* Minimum and maximum frequencies in mel scale. */ + melmin = fe_mel(mel_fb, mel_fb->lower_filt_freq); + melmax = fe_mel(mel_fb, mel_fb->upper_filt_freq); + + /* Width of filters in mel scale */ + melbw = (melmax - melmin) / (mel_fb->num_filters + 1); + if (mel_fb->doublewide) { + melmin -= melbw; + melmax += melbw; + if ((fe_melinv(mel_fb, melmin) < 0) || + (fe_melinv(mel_fb, melmax) > mel_fb->sampling_rate / 2)) { + E_WARN + ("Out of Range: low filter edge = %f (%f)\n", + fe_melinv(mel_fb, melmin), 0.0); + E_WARN + (" high filter edge = %f (%f)\n", + fe_melinv(mel_fb, melmax), mel_fb->sampling_rate / 2); + return FE_INVALID_PARAM_ERROR; + } + } + + /* DFT point spacing */ + fftfreq = mel_fb->sampling_rate / (float32) mel_fb->fft_size; + + /* Count and place filter coefficients. */ + n_coeffs = 0; + for (i = 0; i < mel_fb->num_filters; ++i) { + float32 freqs[3]; + + /* Left, center, right frequencies in Hertz */ + for (j = 0; j < 3; ++j) { + if (mel_fb->doublewide) + freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); + else + freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); + /* Round them to DFT points if requested */ + if (mel_fb->round_filters) + freqs[j] = ((int) (freqs[j] / fftfreq + 0.5)) * fftfreq; + } + + /* spec_start is the start of this filter in the power spectrum. */ + mel_fb->spec_start[i] = -1; + /* There must be a better way... */ + for (j = 0; j < mel_fb->fft_size / 2 + 1; ++j) { + float32 hz = j * fftfreq; + if (hz < freqs[0]) + continue; + else if (hz > freqs[2] || j == mel_fb->fft_size / 2) { + /* filt_width is the width in DFT points of this filter. */ + mel_fb->filt_width[i] = j - mel_fb->spec_start[i]; + /* filt_start is the start of this filter in the filt_coeffs array. */ + mel_fb->filt_start[i] = n_coeffs; + n_coeffs += mel_fb->filt_width[i]; + break; + } + if (mel_fb->spec_start[i] == -1) + mel_fb->spec_start[i] = j; + } + } + + /* Now go back and allocate the coefficient array. */ + mel_fb->filt_coeffs = + ckd_malloc(n_coeffs * sizeof(*mel_fb->filt_coeffs)); + + /* And now generate the coefficients. */ + n_coeffs = 0; + for (i = 0; i < mel_fb->num_filters; ++i) { + float32 freqs[3]; + + /* Left, center, right frequencies in Hertz */ + for (j = 0; j < 3; ++j) { + if (mel_fb->doublewide) + freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); + else + freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); + /* Round them to DFT points if requested */ + if (mel_fb->round_filters) + freqs[j] = ((int) (freqs[j] / fftfreq + 0.5)) * fftfreq; + } + + for (j = 0; j < mel_fb->filt_width[i]; ++j) { + float32 hz, loslope, hislope; + + hz = (mel_fb->spec_start[i] + j) * fftfreq; + if (hz < freqs[0] || hz > freqs[2]) { + E_FATAL + ("Failed to create filterbank, frequency range does not match. " + "Sample rate %f, FFT size %d, lowerf %f < freq %f > upperf %f.\n", + mel_fb->sampling_rate, mel_fb->fft_size, freqs[0], hz, + freqs[2]); + } + loslope = (hz - freqs[0]) / (freqs[1] - freqs[0]); + hislope = (freqs[2] - hz) / (freqs[2] - freqs[1]); + if (mel_fb->unit_area) { + loslope *= 2 / (freqs[2] - freqs[0]); + hislope *= 2 / (freqs[2] - freqs[0]); + } + if (loslope < hislope) { +#ifdef FIXED_POINT + mel_fb->filt_coeffs[n_coeffs] = fe_log(loslope); +#else + mel_fb->filt_coeffs[n_coeffs] = loslope; +#endif + } + else { +#ifdef FIXED_POINT + mel_fb->filt_coeffs[n_coeffs] = fe_log(hislope); +#else + mel_fb->filt_coeffs[n_coeffs] = hislope; +#endif + } + ++n_coeffs; + } + } + + return FE_SUCCESS; +} + +int32 +fe_compute_melcosine(melfb_t * mel_fb) +{ + + float64 freqstep; + int32 i, j; + + mel_fb->mel_cosine = + (mfcc_t **) ckd_calloc_2d(mel_fb->num_cepstra, + mel_fb->num_filters, sizeof(mfcc_t)); + + freqstep = M_PI / mel_fb->num_filters; + /* NOTE: The first row vector is actually unnecessary but we leave + * it in to avoid confusion. */ + for (i = 0; i < mel_fb->num_cepstra; i++) { + for (j = 0; j < mel_fb->num_filters; j++) { + float64 cosine; + + cosine = cos(freqstep * i * (j + 0.5)); + mel_fb->mel_cosine[i][j] = FLOAT2COS(cosine); + } + } + + /* Also precompute normalization constants for unitary DCT. */ + mel_fb->sqrt_inv_n = FLOAT2COS(sqrt(1.0 / mel_fb->num_filters)); + mel_fb->sqrt_inv_2n = FLOAT2COS(sqrt(2.0 / mel_fb->num_filters)); + + /* And liftering weights */ + if (mel_fb->lifter_val) { + mel_fb->lifter = + calloc(mel_fb->num_cepstra, sizeof(*mel_fb->lifter)); + for (i = 0; i < mel_fb->num_cepstra; ++i) { + mel_fb->lifter[i] = FLOAT2MFCC(1 + mel_fb->lifter_val / 2 + * sin(i * M_PI / + mel_fb->lifter_val)); + } + } + + return (0); +} + +static void +fe_pre_emphasis(int16 const *in, frame_t * out, int32 len, + float32 factor, int16 prior) +{ + int i; + +#if defined(FIXED16) + int16 fxd_alpha = (int16) (factor * 0x8000); + int32 tmp1, tmp2; + + tmp1 = (int32) in[0] << 15; + tmp2 = (int32) prior *fxd_alpha; + out[0] = (int16) ((tmp1 - tmp2) >> 15); + for (i = 1; i < len; ++i) { + tmp1 = (int32) in[i] << 15; + tmp2 = (int32) in[i - 1] * fxd_alpha; + out[i] = (int16) ((tmp1 - tmp2) >> 15); + } +#elif defined(FIXED_POINT) + fixed32 fxd_alpha = FLOAT2FIX(factor); + out[0] = ((fixed32) in[0] << DEFAULT_RADIX) - (prior * fxd_alpha); + for (i = 1; i < len; ++i) + out[i] = ((fixed32) in[i] << DEFAULT_RADIX) + - (fixed32) in[i - 1] * fxd_alpha; +#else + out[0] = (frame_t) in[0] - (frame_t) prior *factor; + for (i = 1; i < len; i++) + out[i] = (frame_t) in[i] - (frame_t) in[i - 1] * factor; +#endif +} + +static void +fe_short_to_frame(int16 const *in, frame_t * out, int32 len) +{ + int i; + +#if defined(FIXED16) + memcpy(out, in, len * sizeof(*out)); +#elif defined(FIXED_POINT) + for (i = 0; i < len; i++) + out[i] = (int32) in[i] << DEFAULT_RADIX; +#else /* FIXED_POINT */ + for (i = 0; i < len; i++) + out[i] = (frame_t) in[i]; +#endif /* FIXED_POINT */ +} + +void +fe_create_hamming(window_t * in, int32 in_len) +{ + int i; + + /* Symmetric, so we only create the first half of it. */ + for (i = 0; i < in_len / 2; i++) { + float64 hamm; + hamm = (0.54 - 0.46 * cos(2 * M_PI * i / + ((float64) in_len - 1.0))); +#ifdef FIXED16 + in[i] = (int16) (hamm * 0x8000); +#else + in[i] = FLOAT2COS(hamm); +#endif + } +} + +static void +fe_hamming_window(frame_t * in, window_t * window, int32 in_len, + int32 remove_dc) +{ + int i; + + if (remove_dc) { +#ifdef FIXED16 + int32 mean = 0; /* Use int32 to avoid possibility of overflow */ +#else + frame_t mean = 0; +#endif + + for (i = 0; i < in_len; i++) + mean += in[i]; + mean /= in_len; + for (i = 0; i < in_len; i++) + in[i] -= (frame_t) mean; + } + +#ifdef FIXED16 + for (i = 0; i < in_len / 2; i++) { + int32 tmp1, tmp2; + + tmp1 = (int32) in[i] * window[i]; + tmp2 = (int32) in[in_len - 1 - i] * window[i]; + in[i] = (int16) (tmp1 >> 15); + in[in_len - 1 - i] = (int16) (tmp2 >> 15); + } +#else + for (i = 0; i < in_len / 2; i++) { + in[i] = COSMUL(in[i], window[i]); + in[in_len - 1 - i] = COSMUL(in[in_len - 1 - i], window[i]); + } +#endif +} + +static int +fe_spch_to_frame(fe_t * fe, int len) +{ + /* Copy to the frame buffer. */ + if (fe->pre_emphasis_alpha != 0.0) { + fe_pre_emphasis(fe->spch, fe->frame, len, + fe->pre_emphasis_alpha, fe->prior); + if (len >= fe->frame_shift) + fe->prior = fe->spch[fe->frame_shift - 1]; + else + fe->prior = fe->spch[len - 1]; + } + else + fe_short_to_frame(fe->spch, fe->frame, len); + + /* Zero pad up to FFT size. */ + memset(fe->frame + len, 0, (fe->fft_size - len) * sizeof(*fe->frame)); + + /* Window. */ + fe_hamming_window(fe->frame, fe->hamming_window, fe->frame_size, + fe->remove_dc); + + return len; +} + +int +fe_read_frame(fe_t * fe, int16 const *in, int32 len) +{ + int i; + + if (len > fe->frame_size) + len = fe->frame_size; + + /* Read it into the raw speech buffer. */ + memcpy(fe->spch, in, len * sizeof(*in)); + /* Swap and dither if necessary. */ + if (fe->swap) + for (i = 0; i < len; ++i) + SWAP_INT16(&fe->spch[i]); + if (fe->dither) + for (i = 0; i < len; ++i) + fe->spch[i] += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); + + return fe_spch_to_frame(fe, len); +} + +int +fe_shift_frame(fe_t * fe, int16 const *in, int32 len) +{ + int offset, i; + + if (len > fe->frame_shift) + len = fe->frame_shift; + offset = fe->frame_size - fe->frame_shift; + + /* Shift data into the raw speech buffer. */ + memmove(fe->spch, fe->spch + fe->frame_shift, + offset * sizeof(*fe->spch)); + memcpy(fe->spch + offset, in, len * sizeof(*fe->spch)); + /* Swap and dither if necessary. */ + if (fe->swap) + for (i = 0; i < len; ++i) + SWAP_INT16(&fe->spch[offset + i]); + if (fe->dither) + for (i = 0; i < len; ++i) + fe->spch[offset + i] + += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); + + return fe_spch_to_frame(fe, offset + len); +} + +/** + * Create arrays of twiddle factors. + */ +void +fe_create_twiddle(fe_t * fe) +{ + int i; + + for (i = 0; i < fe->fft_size / 4; ++i) { + float64 a = 2 * M_PI * i / fe->fft_size; +#ifdef FIXED16 + fe->ccc[i] = (int16) (cos(a) * 0x8000); + fe->sss[i] = (int16) (sin(a) * 0x8000); +#elif defined(FIXED_POINT) + fe->ccc[i] = FLOAT2COS(cos(a)); + fe->sss[i] = FLOAT2COS(sin(a)); +#else + fe->ccc[i] = cos(a); + fe->sss[i] = sin(a); +#endif + } +} + + +/* Translated from the FORTRAN (obviously) from "Real-Valued Fast + * Fourier Transform Algorithms" by Henrik V. Sorensen et al., IEEE + * Transactions on Acoustics, Speech, and Signal Processing, vol. 35, + * no.6. The 16-bit version does a version of "block floating + * point" in order to avoid rounding errors. + */ +#if defined(FIXED16) +static int +fe_fft_real(fe_t * fe) +{ + int i, j, k, m, n, lz; + frame_t *x, xt, max; + + x = fe->frame; + m = fe->fft_order; + n = fe->fft_size; + + /* Bit-reverse the input. */ + j = 0; + for (i = 0; i < n - 1; ++i) { + if (i < j) { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = n / 2; + while (k <= j) { + j -= k; + k /= 2; + } + j += k; + } + /* Determine how many bits of dynamic range are in the input. */ + max = 0; + for (i = 0; i < n; ++i) + if (abs(x[i]) > max) + max = abs(x[i]); + /* The FFT has a gain of M bits, so we need to attenuate the input + * by M bits minus the number of leading zeroes in the input's + * range in order to avoid overflows. */ + for (lz = 0; lz < m; ++lz) + if (max & (1 << (15 - lz))) + break; + + /* Basic butterflies (2-point FFT, real twiddle factors): + * x[i] = x[i] + 1 * x[i+1] + * x[i+1] = x[i] + -1 * x[i+1] + */ + /* The quantization error introduced by attenuating the input at + * any given stage of the FFT has a cascading effect, so we hold + * off on it until it's absolutely necessary. */ + for (i = 0; i < n; i += 2) { + int atten = (lz == 0); + xt = x[i] >> atten; + x[i] = xt + (x[i + 1] >> atten); + x[i + 1] = xt - (x[i + 1] >> atten); + } + + /* The rest of the butterflies, in stages from 1..m */ + for (k = 1; k < m; ++k) { + int n1, n2, n4; + /* Start attenuating once we hit the number of leading zeros. */ + int atten = (k >= lz); + + n4 = k - 1; + n2 = k; + n1 = k + 1; + /* Stride over each (1 << (k+1)) points */ + for (i = 0; i < n; i += (1 << n1)) { + /* Basic butterfly with real twiddle factors: + * x[i] = x[i] + 1 * x[i + (1<> atten; + x[i] = xt + (x[i + (1 << n2)] >> atten); + x[i + (1 << n2)] = xt - (x[i + (1 << n2)] >> atten); + + /* The other ones with real twiddle factors: + * x[i + (1<> atten; + x[i + (1 << n4)] = x[i + (1 << n4)] >> atten; + + /* Butterflies with complex twiddle factors. + * There are (1<ccc[j << (m - n1)]; + ss = fe->sss[j << (m - n1)]; + + /* There are some symmetry properties which allow us + * to get away with only four multiplications here. */ + { + int32 tmp1, tmp2; + tmp1 = (int32) x[i3] * cc + (int32) x[i4] * ss; + tmp2 = (int32) x[i3] * ss - (int32) x[i4] * cc; + t1 = (int16) (tmp1 >> 15) >> atten; + t2 = (int16) (tmp2 >> 15) >> atten; + } + + x[i4] = (x[i2] >> atten) - t2; + x[i3] = (-x[i2] >> atten) - t2; + x[i2] = (x[i1] >> atten) - t1; + x[i1] = (x[i1] >> atten) + t1; + } + } + } + + /* Return the residual scaling factor. */ + return lz; +} +#else /* !FIXED16 */ +static int +fe_fft_real(fe_t * fe) +{ + int i, j, k, m, n; + frame_t *x, xt; + + x = fe->frame; + m = fe->fft_order; + n = fe->fft_size; + + /* Bit-reverse the input. */ + j = 0; + for (i = 0; i < n - 1; ++i) { + if (i < j) { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = n / 2; + while (k <= j) { + j -= k; + k /= 2; + } + j += k; + } + + /* Basic butterflies (2-point FFT, real twiddle factors): + * x[i] = x[i] + 1 * x[i+1] + * x[i+1] = x[i] + -1 * x[i+1] + */ + for (i = 0; i < n; i += 2) { + xt = x[i]; + x[i] = (xt + x[i + 1]); + x[i + 1] = (xt - x[i + 1]); + } + + /* The rest of the butterflies, in stages from 1..m */ + for (k = 1; k < m; ++k) { + int n1, n2, n4; + + n4 = k - 1; + n2 = k; + n1 = k + 1; + /* Stride over each (1 << (k+1)) points */ + for (i = 0; i < n; i += (1 << n1)) { + /* Basic butterfly with real twiddle factors: + * x[i] = x[i] + 1 * x[i + (1<ccc[j << (m - n1)]; + ss = fe->sss[j << (m - n1)]; + + /* There are some symmetry properties which allow us + * to get away with only four multiplications here. */ + t1 = COSMUL(x[i3], cc) + COSMUL(x[i4], ss); + t2 = COSMUL(x[i3], ss) - COSMUL(x[i4], cc); + + x[i4] = (x[i2] - t2); + x[i3] = (-x[i2] - t2); + x[i2] = (x[i1] - t1); + x[i1] = (x[i1] + t1); + } + } + } + + /* This isn't used, but return it for completeness. */ + return m; +} +#endif /* !FIXED16 */ + +static void +fe_spec_magnitude(fe_t * fe) +{ + frame_t *fft; + powspec_t *spec; + int32 j, scale, fftsize; + + /* Do FFT and get the scaling factor back (only actually used in + * fixed-point). Note the scaling factor is expressed in bits. */ + scale = fe_fft_real(fe); + + /* Convenience pointers to make things less awkward below. */ + fft = fe->frame; + spec = fe->spec; + fftsize = fe->fft_size; + + /* We need to scale things up the rest of the way to N. */ + scale = fe->fft_order - scale; + + /* The first point (DC coefficient) has no imaginary part */ + { +#ifdef FIXED16 + spec[0] = fixlog(abs(fft[0]) << scale) * 2; +#elif defined(FIXED_POINT) + spec[0] = FIXLN(abs(fft[0]) << scale) * 2; +#else + spec[0] = fft[0] * fft[0]; +#endif + } + + for (j = 1; j <= fftsize / 2; j++) { +#ifdef FIXED16 + int32 rr = fixlog(abs(fft[j]) << scale) * 2; + int32 ii = fixlog(abs(fft[fftsize - j]) << scale) * 2; + spec[j] = fe_log_add(rr, ii); +#elif defined(FIXED_POINT) + int32 rr = FIXLN(abs(fft[j]) << scale) * 2; + int32 ii = FIXLN(abs(fft[fftsize - j]) << scale) * 2; + spec[j] = fe_log_add(rr, ii); +#else + spec[j] = fft[j] * fft[j] + fft[fftsize - j] * fft[fftsize - j]; +#endif + } +} + +static void +fe_mel_spec(fe_t * fe) +{ + int whichfilt; + powspec_t *spec, *mfspec; + + /* Convenience poitners. */ + spec = fe->spec; + mfspec = fe->mfspec; + for (whichfilt = 0; whichfilt < fe->mel_fb->num_filters; whichfilt++) { + int spec_start, filt_start, i; + + spec_start = fe->mel_fb->spec_start[whichfilt]; + filt_start = fe->mel_fb->filt_start[whichfilt]; + +#ifdef FIXED_POINT + mfspec[whichfilt] = + spec[spec_start] + fe->mel_fb->filt_coeffs[filt_start]; + for (i = 1; i < fe->mel_fb->filt_width[whichfilt]; i++) { + mfspec[whichfilt] = fe_log_add(mfspec[whichfilt], + spec[spec_start + i] + + fe->mel_fb-> + filt_coeffs[filt_start + i]); + } +#else /* !FIXED_POINT */ + mfspec[whichfilt] = 0; + for (i = 0; i < fe->mel_fb->filt_width[whichfilt]; i++) + mfspec[whichfilt] += + spec[spec_start + i] * fe->mel_fb->filt_coeffs[filt_start + + i]; +#endif /* !FIXED_POINT */ + } + +} + +#define LOG_FLOOR 1e-4 + +static void +fe_mel_cep(fe_t * fe, mfcc_t * mfcep) +{ + int32 i; + powspec_t *mfspec; + + /* Convenience pointer. */ + mfspec = fe->mfspec; + + for (i = 0; i < fe->mel_fb->num_filters; ++i) { +#ifndef FIXED_POINT /* It's already in log domain for fixed point */ + mfspec[i] = log(mfspec[i] + LOG_FLOOR); +#endif /* !FIXED_POINT */ + } + + /* If we are doing LOG_SPEC, then do nothing. */ + if (fe->log_spec == RAW_LOG_SPEC) { + for (i = 0; i < fe->feature_dimension; i++) { + mfcep[i] = (mfcc_t) mfspec[i]; + } + } + /* For smoothed spectrum, do DCT-II followed by (its inverse) DCT-III */ + else if (fe->log_spec == SMOOTH_LOG_SPEC) { + /* FIXME: This is probably broken for fixed-point. */ + fe_dct2(fe, mfspec, mfcep, 0); + fe_dct3(fe, mfcep, mfspec); + for (i = 0; i < fe->feature_dimension; i++) { + mfcep[i] = (mfcc_t) mfspec[i]; + } + } + else if (fe->transform == DCT_II) + fe_dct2(fe, mfspec, mfcep, FALSE); + else if (fe->transform == DCT_HTK) + fe_dct2(fe, mfspec, mfcep, TRUE); + else + fe_spec2cep(fe, mfspec, mfcep); + + return; +} + +void +fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep) +{ + int32 i, j, beta; + + /* Compute C0 separately (its basis vector is 1) to avoid + * costly multiplications. */ + mfcep[0] = mflogspec[0] / 2; /* beta = 0.5 */ + for (j = 1; j < fe->mel_fb->num_filters; j++) + mfcep[0] += mflogspec[j]; /* beta = 1.0 */ + mfcep[0] /= (frame_t) fe->mel_fb->num_filters; + + for (i = 1; i < fe->num_cepstra; ++i) { + mfcep[i] = 0; + for (j = 0; j < fe->mel_fb->num_filters; j++) { + if (j == 0) + beta = 1; /* 0.5 */ + else + beta = 2; /* 1.0 */ + mfcep[i] += COSMUL(mflogspec[j], + fe->mel_fb->mel_cosine[i][j]) * beta; + } + /* Note that this actually normalizes by num_filters, like the + * original Sphinx front-end, due to the doubled 'beta' factor + * above. */ + mfcep[i] /= (frame_t) fe->mel_fb->num_filters * 2; + } +} + +void +fe_dct2(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep, int htk) +{ + int32 i, j; + + /* Compute C0 separately (its basis vector is 1) to avoid + * costly multiplications. */ + mfcep[0] = mflogspec[0]; + for (j = 1; j < fe->mel_fb->num_filters; j++) + mfcep[0] += mflogspec[j]; + if (htk) + mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_2n); + else /* sqrt(1/N) = sqrt(2/N) * 1/sqrt(2) */ + mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_n); + + for (i = 1; i < fe->num_cepstra; ++i) { + mfcep[i] = 0; + for (j = 0; j < fe->mel_fb->num_filters; j++) { + mfcep[i] += COSMUL(mflogspec[j], fe->mel_fb->mel_cosine[i][j]); + } + mfcep[i] = COSMUL(mfcep[i], fe->mel_fb->sqrt_inv_2n); + } +} + +void +fe_lifter(fe_t * fe, mfcc_t * mfcep) +{ + int32 i; + + if (fe->mel_fb->lifter_val == 0) + return; + + for (i = 0; i < fe->num_cepstra; ++i) { + mfcep[i] = MFCCMUL(mfcep[i], fe->mel_fb->lifter[i]); + } +} + +void +fe_dct3(fe_t * fe, const mfcc_t * mfcep, powspec_t * mflogspec) +{ + int32 i, j; + + for (i = 0; i < fe->mel_fb->num_filters; ++i) { + mflogspec[i] = COSMUL(mfcep[0], SQRT_HALF); + for (j = 1; j < fe->num_cepstra; j++) { + mflogspec[i] += COSMUL(mfcep[j], fe->mel_fb->mel_cosine[j][i]); + } + mflogspec[i] = COSMUL(mflogspec[i], fe->mel_fb->sqrt_inv_2n); + } +} + +void +fe_write_frame(fe_t * fe, mfcc_t * fea) +{ + int32 is_speech; + + fe_spec_magnitude(fe); + fe_mel_spec(fe); + fe_track_snr(fe, &is_speech); + fe_mel_cep(fe, fea); + fe_lifter(fe, fea); + fe_vad_hangover(fe, fea, is_speech); +} + + +void * +fe_create_2d(int32 d1, int32 d2, int32 elem_size) +{ + return (void *) ckd_calloc_2d(d1, d2, elem_size); +} + +void +fe_free_2d(void *arr) +{ + ckd_free_2d((void **) arr); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_type.h b/media/sphinxbase/src/libsphinxbase/fe/fe_type.h new file mode 100644 index 000000000..160ed8ff8 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_type.h @@ -0,0 +1,65 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef FE_TYPE_H +#define FE_TYPE_H + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#ifdef FIXED16 +/* Q15 format */ +typedef int16 frame_t; +typedef int16 window_t; +typedef int32 powspec_t; +typedef struct { int16 r, i; } complex; +#elif defined(FIXED_POINT) +typedef fixed32 frame_t; +typedef int32 powspec_t; +typedef fixed32 window_t; +typedef struct { fixed32 r, i; } complex; +#else /* FIXED_POINT */ +typedef float64 frame_t; +typedef float64 powspec_t; +typedef float64 window_t; +typedef struct { float64 r, i; } complex; +#endif /* FIXED_POINT */ + +#endif /* FE_TYPE_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.c new file mode 100644 index 000000000..e409bea76 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.c @@ -0,0 +1,252 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp.c + * + * Description: + * Allows a caller to choose a warping function. + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $";*/ + +#include "fe_warp_inverse_linear.h" +#include "fe_warp_affine.h" +#include "fe_warp_piecewise_linear.h" +#include "fe_warp.h" + +#include "sphinxbase/err.h" + +#include +#include +#include +#include + +/* This is for aliases for each of the entries below. Currently not + used. +*/ +static char *__name2id[] = { + "inverse", + "linear", + "piecewise", + NULL +}; + +static char *name2id[] = { + "inverse_linear", + "affine", + "piecewise_linear", + NULL +}; + +static fe_warp_conf_t fe_warp_conf[FE_WARP_ID_MAX + 1] = { + {fe_warp_inverse_linear_set_parameters, + fe_warp_inverse_linear_doc, + fe_warp_inverse_linear_id, + fe_warp_inverse_linear_n_param, + fe_warp_inverse_linear_warped_to_unwarped, + fe_warp_inverse_linear_unwarped_to_warped, + fe_warp_inverse_linear_print}, /* Inverse linear warping */ + {fe_warp_affine_set_parameters, + fe_warp_affine_doc, + fe_warp_affine_id, + fe_warp_affine_n_param, + fe_warp_affine_warped_to_unwarped, + fe_warp_affine_unwarped_to_warped, + fe_warp_affine_print}, /* Affine warping */ + {fe_warp_piecewise_linear_set_parameters, + fe_warp_piecewise_linear_doc, + fe_warp_piecewise_linear_id, + fe_warp_piecewise_linear_n_param, + fe_warp_piecewise_linear_warped_to_unwarped, + fe_warp_piecewise_linear_unwarped_to_warped, + fe_warp_piecewise_linear_print}, /* Piecewise_Linear warping */ +}; + +int +fe_warp_set(melfb_t *mel, const char *id_name) +{ + uint32 i; + + for (i = 0; name2id[i]; i++) { + if (strcmp(id_name, name2id[i]) == 0) { + mel->warp_id = i; + break; + } + } + + if (name2id[i] == NULL) { + for (i = 0; __name2id[i]; i++) { + if (strcmp(id_name, __name2id[i]) == 0) { + mel->warp_id = i; + break; + } + } + if (__name2id[i] == NULL) { + E_ERROR("Unimplemented warping function %s\n", id_name); + E_ERROR("Implemented functions are:\n"); + for (i = 0; name2id[i]; i++) { + fprintf(stderr, "\t%s\n", name2id[i]); + } + mel->warp_id = FE_WARP_ID_NONE; + + return FE_START_ERROR; + } + } + + return FE_SUCCESS; +} + +void +fe_warp_set_parameters(melfb_t *mel, char const *param_str, float sampling_rate) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + fe_warp_conf[mel->warp_id].set_parameters(param_str, sampling_rate); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("feat module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } +} + +const char * +fe_warp_doc(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].doc(); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return NULL; +} + +uint32 +fe_warp_id(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + assert(mel->warp_id == fe_warp_conf[mel->warp_id].id()); + return mel->warp_id; + } + else if (mel->warp_id != FE_WARP_ID_NONE) { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return FE_WARP_ID_NONE; +} + +uint32 +fe_warp_n_param(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].n_param(); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +float +fe_warp_warped_to_unwarped(melfb_t *mel, float nonlinear) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].warped_to_unwarped(nonlinear); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +float +fe_warp_unwarped_to_warped(melfb_t *mel,float linear) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].unwarped_to_warped(linear); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +void +fe_warp_print(melfb_t *mel, const char *label) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + fe_warp_conf[mel->warp_id].print(label); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.h new file mode 100644 index 000000000..f2fd14550 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp.h @@ -0,0 +1,90 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_H +#define FE_WARP_H + +#include "fe_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define FE_WARP_ID_INVERSE_LINEAR 0 +#define FE_WARP_ID_AFFINE 1 +#define FE_WARP_ID_PIECEWISE_LINEAR 2 +#define FE_WARP_ID_EIDE_GISH 3 +#define FE_WARP_ID_MAX 2 +#define FE_WARP_ID_NONE 0xffffffff + +typedef struct { + void (*set_parameters)(char const *param_str, float sampling_rate); + const char * (*doc)(void); + uint32 (*id)(void); + uint32 (*n_param)(void); + float (*warped_to_unwarped)(float nonlinear); + float (*unwarped_to_warped)(float linear); + void (*print)(const char *label); +} fe_warp_conf_t; + +int fe_warp_set(melfb_t *mel, const char *id_name); + +uint32 fe_warp_id(melfb_t *mel); + +const char * fe_warp_doc(melfb_t *mel); + +void fe_warp_set_parameters(melfb_t *mel, char const *param_str, float sampling_rate); + +uint32 fe_warp_n_param(melfb_t *mel); + +float fe_warp_warped_to_unwarped(melfb_t *mel, float nonlinear); + +float fe_warp_unwarped_to_warped(melfb_t *mel, float linear); + +void fe_warp_print(melfb_t *mel, const char *label); + +#define FE_WARP_NO_SIZE 0xffffffff + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c new file mode 100644 index 000000000..398611917 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.c @@ -0,0 +1,181 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_affine.c + * + * Description: + * Warp the frequency axis according to an affine function, i.e.: + * + * w' = a * w + b + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_affine.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $"; */ + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_affine.h" + +#define N_PARAM 2 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + * params[1] : b + */ +static float params[N_PARAM] = { 1.0f, 0.0f }; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_affine_doc() +{ + return "affine :== < w' = a * x + b >"; +} + +uint32 +fe_warp_affine_id() +{ + return FE_WARP_ID_AFFINE; +} + +uint32 +fe_warp_affine_n_param() +{ + return N_PARAM; +} + +void +fe_warp_affine_set_parameters(char const *param_str, float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Affine warping takes up to two arguments, %s ignored.\n", + tok); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Affine warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_affine_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = (nonlinear - b) / a */ + float temp = nonlinear - params[1]; + temp /= params[0]; + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_affine_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + /* nonlinear = a * linear - b */ + float temp = linear * params[0]; + temp += params[1]; + return temp; + } +} + +void +fe_warp_affine_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h new file mode 100644 index 000000000..44027d97a --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_affine.h @@ -0,0 +1,76 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_AFFINE_H +#define FE_WARP_AFFINE_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_affine_doc(void); + +uint32 +fe_warp_affine_id(void); + +uint32 +fe_warp_affine_n_param(void); + +void +fe_warp_affine_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_affine_warped_to_unwarped(float nonlinear); + +float +fe_warp_affine_unwarped_to_warped(float linear); + +void +fe_warp_affine_print(const char *label); + +#ifdef __cplusplus +} +#endif + +#endif /* FE_WARP_AFFINE_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c new file mode 100644 index 000000000..85e42986b --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.c @@ -0,0 +1,178 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_inverse_linear.c + * + * Description: + * Warp the frequency axis according to an inverse_linear function, i.e.: + * + * w' = w / a + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_inverse_linear.c,v 1.3 2006/02/23 19:40:11 eht Exp $"; */ + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_inverse_linear.h" + +#define N_PARAM 1 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + */ +static float params[N_PARAM] = { 1.0f }; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_inverse_linear_doc() +{ + return "inverse_linear :== < w' = x / a >"; +} + +uint32 +fe_warp_inverse_linear_id() +{ + return FE_WARP_ID_INVERSE_LINEAR; +} + +uint32 +fe_warp_inverse_linear_n_param() +{ + return N_PARAM; +} + +void +fe_warp_inverse_linear_set_parameters(char const *param_str, float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Inverse linear warping takes only one argument, %s ignored.\n", + tok); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Inverse linear warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_inverse_linear_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = nonlinear * a */ + float temp = nonlinear * params[0]; + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_inverse_linear_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + /* nonlinear = a / linear */ + float temp = linear / params[0]; + return temp; + } +} + +void +fe_warp_inverse_linear_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h new file mode 100644 index 000000000..8d4a76725 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_inverse_linear.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_inverse_linear_H +#define FE_WARP_inverse_linear_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_inverse_linear_doc(void); + +uint32 +fe_warp_inverse_linear_id(void); + +uint32 +fe_warp_inverse_linear_n_param(void); + +void +fe_warp_inverse_linear_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_inverse_linear_warped_to_unwarped(float nonlinear); + +float +fe_warp_inverse_linear_unwarped_to_warped(float linear); + +void +fe_warp_inverse_linear_print(const char *label); + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_inverse_linear_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c new file mode 100644 index 000000000..34570120f --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.c @@ -0,0 +1,223 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_piecewise_linear.c + * + * Description: + * + * Warp the frequency axis according to an piecewise linear + * function. The function is linear up to a frequency F, where + * the slope changes so that the Nyquist frequency in the warped + * axis maps to the Nyquist frequency in the unwarped. + * + * w' = a * w, w < F + * w' = a' * w + b, W > F + * w'(0) = 0 + * w'(F) = F + * w'(Nyq) = Nyq + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_piecewise_linear.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $"; */ + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_piecewise_linear.h" + +#define N_PARAM 2 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + * params[1] : F (the non-differentiable point) + */ +static float params[N_PARAM] = { 1.0f, 6800.0f }; +static float final_piece[2]; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_piecewise_linear_doc() +{ + return "piecewise_linear :== < w' = a * w, w < F >"; +} + +uint32 +fe_warp_piecewise_linear_id() +{ + return FE_WARP_ID_PIECEWISE_LINEAR; +} + +uint32 +fe_warp_piecewise_linear_n_param() +{ + return N_PARAM; +} + +void +fe_warp_piecewise_linear_set_parameters(char const *param_str, + float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + memset(final_piece, 0, 2 * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Piecewise linear warping takes up to two arguments, %s ignored.\n", + tok); + } + if (params[1] < sampling_rate) { + /* Precompute these. These are the coefficients of a + * straight line that contains the points (F, aF) and (N, + * N), where a = params[0], F = params[1], N = Nyquist + * frequency. + */ + if (params[1] == 0) { + params[1] = sampling_rate * 0.85f; + } + final_piece[0] = + (nyquist_frequency - + params[0] * params[1]) / (nyquist_frequency - params[1]); + final_piece[1] = + nyquist_frequency * params[1] * (params[0] - + 1.0f) / (nyquist_frequency - + params[1]); + } + else { + memset(final_piece, 0, 2 * sizeof(float)); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Piecewise linear warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_piecewise_linear_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = (nonlinear - b) / a */ + float temp; + if (nonlinear < params[0] * params[1]) { + temp = nonlinear / params[0]; + } + else { + temp = nonlinear - final_piece[1]; + temp /= final_piece[0]; + } + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_piecewise_linear_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + float temp; + /* nonlinear = a * linear - b */ + if (linear < params[1]) { + temp = linear * params[0]; + } + else { + temp = final_piece[0] * linear + final_piece[1]; + } + return temp; + } +} + +void +fe_warp_piecewise_linear_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h new file mode 100644 index 000000000..f15cb251e --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fe_warp_piecewise_linear.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_PIECEWIDE_LINEAR_H +#define FE_WARP_PIECEWIDE_LINEAR_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_piecewise_linear_doc(void); + +uint32 +fe_warp_piecewise_linear_id(void); + +uint32 +fe_warp_piecewise_linear_n_param(void); + +void +fe_warp_piecewise_linear_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_piecewise_linear_warped_to_unwarped(float nonlinear); + +float +fe_warp_piecewise_linear_unwarped_to_warped(float linear); + +void +fe_warp_piecewise_linear_print(const char *label); + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_PIECEWIDE_LINEAR_H */ diff --git a/media/sphinxbase/src/libsphinxbase/fe/fixlog.c b/media/sphinxbase/src/libsphinxbase/fe/fixlog.c new file mode 100644 index 000000000..459c9ffd6 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/fixlog.c @@ -0,0 +1,229 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * File: fixlog.c + * + * Description: Fast approximate fixed-point logarithms + * + * Author: David Huggins-Daines + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_internal.h" + +/* Table of log2(x/128)*(1<= 4 + y = __builtin_clz(x); + x <<= y; + y = (31 - y); +#else + for (y = 31; y > 0; --y) { + if (x & 0x80000000) + break; + x <<= 1; + } +#endif + y <<= DEFAULT_RADIX; + /* Do a table lookup for the MSB of the mantissa. */ + x = (x >> 24) & 0x7f; + return y + logtable[x]; +} + +int +fixlog(uint32 x) +{ + int32 y; + y = fixlog2(x); + return FIXMUL(y, FIXLN_2); +} diff --git a/media/sphinxbase/src/libsphinxbase/fe/yin.c b/media/sphinxbase/src/libsphinxbase/fe/yin.c new file mode 100644 index 000000000..a63fb30a9 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/fe/yin.c @@ -0,0 +1,412 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Copyright (c) 2008 Beyond Access, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR + * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file yin.c Implementation of pitch extraction. + * @author David Huggins-Daines + */ + +/* This implements part of the YIN algorithm: + * + * "YIN, a fundamental frequency estimator for speech and music". + * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical + * Society of America, 111 (4), April 2002. + */ + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/fixpoint.h" + +#include "sphinxbase/yin.h" + +#include +#include + +struct yin_s { + uint16 frame_size; /** Size of analysis frame. */ +#ifndef FIXED_POINT + float search_threshold; /**< Threshold for finding period */ + float search_range; /**< Range around best local estimate to search */ +#else + uint16 search_threshold; /**< Threshold for finding period, in Q15 */ + uint16 search_range; /**< Range around best local estimate to search, in Q15 */ +#endif + uint16 nfr; /**< Number of frames read so far. */ + + unsigned char wsize; /**< Size of smoothing window. */ + unsigned char wstart; /**< First frame in window. */ + unsigned char wcur; /**< Current frame of analysis. */ + unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */ + +#ifndef FIXED_POINT + float **diff_window; /**< Window of difference function outputs. */ +#else + fixed32 **diff_window; /**< Window of difference function outputs. */ +#endif + uint16 *period_window; /**< Window of best period estimates. */ + int16 *frame; /**< Storage for frame */ +}; + +/** + * The core of YIN: cumulative mean normalized difference function. + */ +#ifndef FIXED_POINT +static void +cmn_diff(int16 const *signal, float *out_diff, int ndiff) +{ + double cum; + int t, j; + + cum = 0.0f; + out_diff[0] = 1.0f; + + for (t = 1; t < ndiff; ++t) { + float dd; + dd = 0.0f; + for (j = 0; j < ndiff; ++j) { + int diff = signal[j] - signal[t + j]; + dd += (diff * diff); + } + cum += dd; + out_diff[t] = (float)(dd * t / cum); + } +} +#else +static void +cmn_diff(int16 const *signal, int32 *out_diff, int ndiff) +{ + uint32 cum, cshift; + int32 t, tscale; + + out_diff[0] = 32768; + cum = 0; + cshift = 0; + + /* Determine how many bits we can scale t up by below. */ + for (tscale = 0; tscale < 32; ++tscale) + if (ndiff & (1<<(31-tscale))) + break; + --tscale; /* Avoid teh overflowz. */ + /* printf("tscale is %d (ndiff - 1) << tscale is %d\n", + tscale, (ndiff-1) << tscale); */ + + /* Somewhat elaborate block floating point implementation. + * The fp implementation of this is really a lot simpler. */ + for (t = 1; t < ndiff; ++t) { + uint32 dd, dshift, norm; + int j; + + dd = 0; + dshift = 0; + for (j = 0; j < ndiff; ++j) { + int diff = signal[j] - signal[t + j]; + /* Guard against overflows. */ + if (dd > (1UL<>= 1; + ++dshift; + } + dd += (diff * diff) >> dshift; + } + /* Make sure the diffs and cum are shifted to the same + * scaling factor (usually dshift will be zero) */ + if (dshift > cshift) { + cum += dd << (dshift-cshift); + } + else { + cum += dd >> (cshift-dshift); + } + + /* Guard against overflows and also ensure that (t< cum. */ + while (cum > (1UL<>= 1; + ++cshift; + } + /* Avoid divide-by-zero! */ + if (cum == 0) cum = 1; + /* Calculate the normalizer in high precision. */ + norm = (t << tscale) / cum; + /* Do a long multiply and shift down to Q15. */ + out_diff[t] = (int32)(((long long)dd * norm) + >> (tscale - 15 + cshift - dshift)); + /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n", + dd, cshift, dshift, (t<frame_size = frame_size; +#ifndef FIXED_POINT + pe->search_threshold = search_threshold; + pe->search_range = search_range; +#else + pe->search_threshold = (uint16)(search_threshold * 32768); + pe->search_range = (uint16)(search_range * 32768); +#endif + pe->wsize = smooth_window * 2 + 1; + pe->diff_window = ckd_calloc_2d(pe->wsize, + pe->frame_size / 2, + sizeof(**pe->diff_window)); + pe->period_window = ckd_calloc(pe->wsize, + sizeof(*pe->period_window)); + pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame)); + return pe; +} + +void +yin_free(yin_t *pe) +{ + ckd_free_2d(pe->diff_window); + ckd_free(pe->period_window); + ckd_free(pe); +} + +void +yin_start(yin_t *pe) +{ + /* Reset the circular window pointers. */ + pe->wstart = pe->endut = 0; + pe->nfr = 0; +} + +void +yin_end(yin_t *pe) +{ + pe->endut = 1; +} + +int +#ifndef FIXED_POINT +thresholded_search(float *diff_window, float threshold, int start, int end) +#else +thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end) +#endif +{ + int i, argmin; +#ifndef FIXED_POINT + float min; +#else + int min; +#endif + + min = diff_window[start]; + argmin = start; + for (i = start + 1; i < end; ++i) { +#ifndef FIXED_POINT + float diff = diff_window[i]; +#else + int diff = diff_window[i]; +#endif + + if (diff < threshold) { + min = diff; + argmin = i; + break; + } + if (diff < min) { + min = diff; + argmin = i; + } + } + return argmin; +} + +void +yin_store(yin_t *pe, int16 const *frame) +{ + memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame)); +} + +void +yin_write(yin_t *pe, int16 const *frame) +{ + int outptr, difflen; + + /* Rotate the window one frame forward. */ + ++pe->wstart; + /* Fill in the frame before wstart. */ + outptr = pe->wstart - 1; + /* Wrap around the window pointer. */ + if (pe->wstart == pe->wsize) + pe->wstart = 0; + + /* Now calculate normalized difference function. */ + difflen = pe->frame_size / 2; + cmn_diff(frame, pe->diff_window[outptr], difflen); + + /* Find the first point under threshold. If not found, then + * use the absolute minimum. */ + pe->period_window[outptr] + = thresholded_search(pe->diff_window[outptr], + pe->search_threshold, 0, difflen); + + /* Increment total number of frames. */ + ++pe->nfr; +} + +void +yin_write_stored(yin_t *pe) +{ + yin_write(pe, pe->frame); +} + +int +yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff) +{ + int wstart, wlen, half_wsize, i; + int best, search_width, low_period, high_period; +#ifndef FIXED_POINT + float best_diff; +#else + int best_diff; +#endif + + half_wsize = (pe->wsize-1)/2; + /* Without any smoothing, just return the current value (don't + * need to do anything to the current poitner either). */ + if (half_wsize == 0) { + if (pe->endut) + return 0; + *out_period = pe->period_window[0]; +#ifndef FIXED_POINT + *out_bestdiff = pe->diff_window[0][pe->period_window[0]]; +#else + *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f; +#endif + return 1; + } + + /* We can't do anything unless we have at least (wsize-1)/2 + 1 + * frames, unless we're at the end of the utterance. */ + if (pe->endut == 0 && pe->nfr < half_wsize + 1) { + /* Don't increment the current pointer either. */ + return 0; + } + + /* Establish the smoothing window. */ + /* End of utterance. */ + if (pe->endut) { + /* We are done (no more data) when pe->wcur = pe->wstart. */ + if (pe->wcur == pe->wstart) + return 0; + /* I.e. pe->wcur (circular minus) half_wsize. */ + wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize; + /* Number of frames from wstart up to pe->wstart. */ + wlen = pe->wstart - wstart; + if (wlen < 0) wlen += pe->wsize; + /*printf("ENDUT! ");*/ + } + /* Beginning of utterance. */ + else if (pe->nfr < pe->wsize) { + wstart = 0; + wlen = pe->nfr; + } + /* Normal case, it is what it is. */ + else { + wstart = pe->wstart; + wlen = pe->wsize; + } + + /* Now (finally) look for the best local estimate. */ + /* printf("Searching for local estimate in %d frames around %d\n", + wlen, pe->nfr + 1 - wlen); */ + best = pe->period_window[pe->wcur]; + best_diff = pe->diff_window[pe->wcur][best]; + for (i = 0; i < wlen; ++i) { + int j = wstart + i; +#ifndef FIXED_POINT + float diff; +#else + int diff; +#endif + + j %= pe->wsize; + diff = pe->diff_window[j][pe->period_window[j]]; + /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768, + pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */ + if (diff < best_diff) { + best_diff = diff; + best = pe->period_window[j]; + } + } + /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768, + best ? 8000.0/best : 8000.0); */ + /* If it's the same as the current one then return it. */ + if (best == pe->period_window[pe->wcur]) { + /* Increment the current pointer. */ + if (++pe->wcur == pe->wsize) + pe->wcur = 0; + *out_period = best; +#ifndef FIXED_POINT + *out_bestdiff = best_diff; +#else + *out_bestdiff = best_diff / 32768.0f; +#endif + return 1; + } + /* Otherwise, redo the search inside a narrower range. */ +#ifndef FIXED_POINT + search_width = (int)(best * pe->search_range); +#else + search_width = best * pe->search_range / 32768; +#endif + /* printf("Search width = %d * %.2f = %d\n", + best, (double)pe->search_range/32768, search_width); */ + if (search_width == 0) search_width = 1; + low_period = best - search_width; + high_period = best + search_width; + if (low_period < 0) low_period = 0; + if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2; + /* printf("Searching from %d to %d\n", low_period, high_period); */ + best = thresholded_search(pe->diff_window[pe->wcur], + pe->search_threshold, + low_period, high_period); + best_diff = pe->diff_window[pe->wcur][best]; + + if (out_period) + *out_period = (best > 65535) ? 65535 : best; + if (out_bestdiff) { +#ifndef FIXED_POINT + *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff; +#else + *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f; +#endif + } + + /* Increment the current pointer. */ + if (++pe->wcur == pe->wsize) + pe->wcur = 0; + return 1; +} diff --git a/media/sphinxbase/src/libsphinxbase/feat/agc.c b/media/sphinxbase/src/libsphinxbase/feat/agc.c new file mode 100644 index 000000000..271baf49d --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/feat/agc.c @@ -0,0 +1,227 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * agc.c -- Various forms of automatic gain control (AGC) + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.5 2005/06/21 19:25:41 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/agc.h" + +/* NOTE! These must match the enum in agc.h */ +const char *agc_type_str[] = { + "none", + "max", + "emax", + "noise" +}; +static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]); + +agc_type_t +agc_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < n_agc_type_str; ++i) { + if (0 == strcmp(str, agc_type_str[i])) + return (agc_type_t)i; + } + E_FATAL("Unknown AGC type '%s'\n", str); + return AGC_NONE; +} + +agc_t *agc_init(void) +{ + agc_t *agc; + agc = ckd_calloc(1, sizeof(*agc)); + agc->noise_thresh = FLOAT2MFCC(2.0); + + return agc; +} + +void agc_free(agc_t *agc) +{ + ckd_free(agc); +} + +/** + * Normalize c0 for all frames such that max(c0) = 0. + */ +void +agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame) +{ + int32 i; + + if (n_frame <= 0) + return; + agc->obs_max = mfc[0][0]; + for (i = 1; i < n_frame; i++) { + if (mfc[i][0] > agc->obs_max) { + agc->obs_max = mfc[i][0]; + agc->obs_frame = 1; + } + } + + E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max); + for (i = 0; i < n_frame; i++) + mfc[i][0] -= agc->obs_max; +} + +void +agc_emax_set(agc_t *agc, float32 m) +{ + agc->max = FLOAT2MFCC(m); + E_INFO("AGCEMax: max= %.2f\n", m); +} + +float32 +agc_emax_get(agc_t *agc) +{ + return MFCC2FLOAT(agc->max); +} + +void +agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame) +{ + int i; + + if (n_frame <= 0) + return; + for (i = 0; i < n_frame; ++i) { + if (mfc[i][0] > agc->obs_max) { + agc->obs_max = mfc[i][0]; + agc->obs_frame = 1; + } + mfc[i][0] -= agc->max; + } +} + +/* Update estimated max for next utterance */ +void +agc_emax_update(agc_t *agc) +{ + if (agc->obs_frame) { /* Update only if some data observed */ + agc->obs_max_sum += agc->obs_max; + agc->obs_utt++; + + /* Re-estimate max over past history; decay the history */ + agc->max = agc->obs_max_sum / agc->obs_utt; + if (agc->obs_utt == 16) { + agc->obs_max_sum /= 2; + agc->obs_utt = 8; + } + } + E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max); + + /* Reset the accumulators for the next utterance. */ + agc->obs_frame = 0; + agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */ +} + +void +agc_noise(agc_t *agc, + mfcc_t **cep, + int32 nfr) +{ + mfcc_t min_energy; /* Minimum log-energy */ + mfcc_t noise_level; /* Average noise_level */ + int32 i; /* frame index */ + int32 noise_frames; /* Number of noise frames */ + + /* Determine minimum log-energy in utterance */ + min_energy = cep[0][0]; + for (i = 0; i < nfr; ++i) { + if (cep[i][0] < min_energy) + min_energy = cep[i][0]; + } + + /* Average all frames between min_energy and min_energy + agc->noise_thresh */ + noise_frames = 0; + noise_level = 0; + min_energy += agc->noise_thresh; + for (i = 0; i < nfr; ++i) { + if (cep[i][0] < min_energy) { + noise_level += cep[i][0]; + noise_frames++; + } + } + + if (noise_frames > 0) { + noise_level /= noise_frames; + E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level)); + /* Subtract noise_level from all log_energy values */ + for (i = 0; i < nfr; i++) { + cep[i][0] -= noise_level; + } + } +} + +void +agc_set_threshold(agc_t *agc, float32 threshold) +{ + agc->noise_thresh = FLOAT2MFCC(threshold); +} + +float32 +agc_get_threshold(agc_t *agc) +{ + return FLOAT2MFCC(agc->noise_thresh); +} diff --git a/media/sphinxbase/src/libsphinxbase/feat/cmn.c b/media/sphinxbase/src/libsphinxbase/feat/cmn.c new file mode 100644 index 000000000..c133c19a3 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/feat/cmn.c @@ -0,0 +1,238 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmn.c -- Various forms of cepstral mean normalization + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.14 2006/02/24 15:57:47 egouvea + * Removed cmn = NULL from the cmn_free(), since it's pointless (my bad!). + * + * Removed cmn_prior, which was surrounded by #if 0/#endif, since the + * function is already in cmn_prior.c + * + * Revision 1.13 2006/02/23 03:47:49 arthchan2003 + * Used Evandro's changes. Resolved conflicts. + * + * + * Revision 1.12 2006/02/23 00:48:23 egouvea + * Replaced loops resetting vectors with the more efficient memset() + * + * Revision 1.11 2006/02/22 23:43:55 arthchan2003 + * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH: Put data structure into the cmn_t structure. + * + * Revision 1.10.4.2 2005/10/17 04:45:57 arthchan2003 + * Free stuffs in cmn and feat corectly. + * + * Revision 1.10.4.1 2005/07/05 06:25:08 arthchan2003 + * Fixed dox-doc. + * + * Revision 1.10 2005/06/21 19:28:00 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) + * Added cmn_free() and moved *mean and *var out global space and named them cmn_mean and cmn_var + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Changed the name norm_mean() to cmn(). + * + * 19-Jun-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Changed to compute CMN over ALL dimensions of cep instead of 1..12. + * + * 04-Nov-1995 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +#include +#include +#include +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmn.h" + +/* NOTE! These must match the enum in cmn.h */ +const char *cmn_type_str[] = { + "none", + "current", + "prior" +}; +static const int n_cmn_type_str = sizeof(cmn_type_str)/sizeof(cmn_type_str[0]); + +cmn_type_t +cmn_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < n_cmn_type_str; ++i) { + if (0 == strcmp(str, cmn_type_str[i])) + return (cmn_type_t)i; + } + E_FATAL("Unknown CMN type '%s'\n", str); + return CMN_NONE; +} + +cmn_t * +cmn_init(int32 veclen) +{ + cmn_t *cmn; + cmn = (cmn_t *) ckd_calloc(1, sizeof(cmn_t)); + cmn->veclen = veclen; + cmn->cmn_mean = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->cmn_var = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->sum = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + /* A front-end dependent magic number */ + cmn->cmn_mean[0] = FLOAT2MFCC(12.0); + cmn->nframe = 0; + E_INFO("mean[0]= %.2f, mean[1..%d]= 0.0\n", + MFCC2FLOAT(cmn->cmn_mean[0]), veclen - 1); + + return cmn; +} + + +void +cmn(cmn_t *cmn, mfcc_t ** mfc, int32 varnorm, int32 n_frame) +{ + mfcc_t *mfcp; + mfcc_t t; + int32 i, f; + int32 n_pos_frame; + + assert(mfc != NULL); + + if (n_frame <= 0) + return; + + /* If cmn->cmn_mean wasn't NULL, we need to zero the contents */ + memset(cmn->cmn_mean, 0, cmn->veclen * sizeof(mfcc_t)); + + /* Find mean cep vector for this utterance */ + for (f = 0, n_pos_frame = 0; f < n_frame; f++) { + mfcp = mfc[f]; + + /* Skip zero energy frames */ + if (mfcp[0] < 0) + continue; + + for (i = 0; i < cmn->veclen; i++) { + cmn->cmn_mean[i] += mfcp[i]; + } + + n_pos_frame++; + } + + for (i = 0; i < cmn->veclen; i++) + cmn->cmn_mean[i] /= n_pos_frame; + + E_INFO("CMN: "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT("\n"); + if (!varnorm) { + /* Subtract mean from each cep vector */ + for (f = 0; f < n_frame; f++) { + mfcp = mfc[f]; + for (i = 0; i < cmn->veclen; i++) + mfcp[i] -= cmn->cmn_mean[i]; + } + } + else { + /* Scale cep vectors to have unit variance along each dimension, and subtract means */ + /* If cmn->cmn_var wasn't NULL, we need to zero the contents */ + memset(cmn->cmn_var, 0, cmn->veclen * sizeof(mfcc_t)); + + for (f = 0; f < n_frame; f++) { + mfcp = mfc[f]; + + for (i = 0; i < cmn->veclen; i++) { + t = mfcp[i] - cmn->cmn_mean[i]; + cmn->cmn_var[i] += MFCCMUL(t, t); + } + } + for (i = 0; i < cmn->veclen; i++) + /* Inverse Std. Dev, RAH added type case from sqrt */ + cmn->cmn_var[i] = FLOAT2MFCC(sqrt((float64)n_frame / MFCC2FLOAT(cmn->cmn_var[i]))); + + for (f = 0; f < n_frame; f++) { + mfcp = mfc[f]; + for (i = 0; i < cmn->veclen; i++) + mfcp[i] = MFCCMUL((mfcp[i] - cmn->cmn_mean[i]), cmn->cmn_var[i]); + } + } +} + +/* + * RAH, free previously allocated memory + */ +void +cmn_free(cmn_t * cmn) +{ + if (cmn != NULL) { + if (cmn->cmn_var) + ckd_free((void *) cmn->cmn_var); + + if (cmn->cmn_mean) + ckd_free((void *) cmn->cmn_mean); + + if (cmn->sum) + ckd_free((void *) cmn->sum); + + ckd_free((void *) cmn); + } +} diff --git a/media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c b/media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c new file mode 100644 index 000000000..9d1801aa7 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/feat/cmn_prior.c @@ -0,0 +1,184 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/************************************************* + * CMU ARPA Speech Project + * + * Copyright (c) 2000 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University + * Created + */ + + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmn.h" + +void +cmn_prior_set(cmn_t *cmn, mfcc_t const * vec) +{ + int32 i; + + E_INFO("cmn_prior_set: from < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); + + for (i = 0; i < cmn->veclen; i++) { + cmn->cmn_mean[i] = vec[i]; + cmn->sum[i] = vec[i] * CMN_WIN; + } + cmn->nframe = CMN_WIN; + + E_INFO("cmn_prior_set: to < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); +} + +void +cmn_prior_get(cmn_t *cmn, mfcc_t * vec) +{ + int32 i; + + for (i = 0; i < cmn->veclen; i++) + vec[i] = cmn->cmn_mean[i]; + +} + +static void +cmn_prior_shiftwin(cmn_t *cmn) +{ + mfcc_t sf; + int32 i; + + E_INFO("cmn_prior_update: from < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); + + sf = FLOAT2MFCC(1.0) / cmn->nframe; + for (i = 0; i < cmn->veclen; i++) + cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf */ + + /* Make the accumulation decay exponentially */ + if (cmn->nframe >= CMN_WIN_HWM) { + sf = CMN_WIN * sf; + for (i = 0; i < cmn->veclen; i++) + cmn->sum[i] = MFCCMUL(cmn->sum[i], sf); + cmn->nframe = CMN_WIN; + } + + E_INFO("cmn_prior_update: to < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); +} + +void +cmn_prior_update(cmn_t *cmn) +{ + mfcc_t sf; + int32 i; + + if (cmn->nframe <= 0) + return; + + E_INFO("cmn_prior_update: from < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); + + /* Update mean buffer */ + sf = FLOAT2MFCC(1.0) / cmn->nframe; + for (i = 0; i < cmn->veclen; i++) + cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf; */ + + /* Make the accumulation decay exponentially */ + if (cmn->nframe > CMN_WIN_HWM) { + sf = CMN_WIN * sf; + for (i = 0; i < cmn->veclen; i++) + cmn->sum[i] = MFCCMUL(cmn->sum[i], sf); + cmn->nframe = CMN_WIN; + } + + E_INFO("cmn_prior_update: to < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); +} + +void +cmn_prior(cmn_t *cmn, mfcc_t **incep, int32 varnorm, int32 nfr) +{ + int32 i, j; + + if (nfr <= 0) + return; + + if (varnorm) + E_FATAL + ("Variance normalization not implemented in live mode decode\n"); + + for (i = 0; i < nfr; i++) { + + /* Skip zero energy frames */ + if (incep[i][0] < 0) + continue; + + for (j = 0; j < cmn->veclen; j++) { + cmn->sum[j] += incep[i][j]; + incep[i][j] -= cmn->cmn_mean[j]; + } + + ++cmn->nframe; + } + + /* Shift buffer down if we have more than CMN_WIN_HWM frames */ + if (cmn->nframe > CMN_WIN_HWM) + cmn_prior_shiftwin(cmn); +} diff --git a/media/sphinxbase/src/libsphinxbase/feat/feat.c b/media/sphinxbase/src/libsphinxbase/feat/feat.c new file mode 100644 index 000000000..d2252fd85 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/feat/feat.c @@ -0,0 +1,1497 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * feat.c -- Feature vector description and cepstra->feature computation. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.22 2006/02/23 03:59:40 arthchan2003 + * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc. + * + * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003 + * Free stuffs in cmn and feat corectly. + * + * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003 + * Add message to show the directory which the feature is searched for. + * + * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003 + * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point. + * + * Revision 1.21 2005/06/22 03:29:35 arthchan2003 + * Makefile.am s for all subdirectory of libs3decoder/ + * + * Revision 1.4 2005/04/21 23:50:26 archan + * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) + * Adding feat_free() to free allocated memory + * + * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University + * Modified feat_s2mfc2feat_block() to handle empty buffers at + * the end of an utterance + * + * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University + * Added feat_s2mfc2feat_block() to allow feature computation + * from sequences of blocks of cepstral vectors + * + * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Major changes to accommodate arbitrary feature input types. Added + * feat_read(), moved various cep2feat functions from other files into + * this one. Also, made this module object-oriented with the feat_t type. + * Changed definition of s2mfc_read to let the caller manage MFC buffers. + * + * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added unistd.h include. + * + * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added check for sf argument to s2mfc_read being within file size. + * + * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added sf, ef parameters to s2mfc_read(). + * + * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added feat_cepsize(). + * Added different feature-handling (s2_4x, s3_1x39 at this point). + * Moved feature-dependent functions to feature-dependent files. + * + * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Moved constant declarations from feat.h into here. + * + * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +/* + * This module encapsulates different feature streams used by the Sphinx group. New + * stream types can be added by augmenting feat_init() and providing an accompanying + * compute_feat function. It also provides a "generic" feature vector definition for + * handling "arbitrary" speech input feature types (see the last section in feat_init()). + * In this case the speech input data should already be feature vectors; no computation, + * such as MFC->feature conversion, is available or needed. + */ + +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244 4996) +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/feat.h" +#include "sphinxbase/bio.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/cmn.h" +#include "sphinxbase/agc.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/prim_type.h" +#include "sphinxbase/glist.h" + +#define FEAT_VERSION "1.0" +#define FEAT_DCEP_WIN 2 + +#ifdef DUMP_FEATURES +static void +cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text) +{ + int32 i, j; + + E_INFO("%s\n", text); + for (i = 0; i < nfr; i++) { + for (j = 0; j < fcb->cepsize; j++) { + fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j])); + } + fprintf(stderr, "\n"); + } +} +static void +feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text) +{ + E_INFO("%s\n", text); + feat_print(fcb, feat, nfr, stderr); +} +#else /* !DUMP_FEATURES */ +#define cep_dump_dbg(fcb,mfc,nfr,text) +#define feat_print_dbg(fcb,mfc,nfr,text) +#endif + +int32 ** +parse_subvecs(char const *str) +{ + char const *strp; + int32 n, n2, l; + glist_t dimlist; /* List of dimensions in one subvector */ + glist_t veclist; /* List of dimlists (subvectors) */ + int32 **subvec; + gnode_t *gn, *gn2; + + veclist = NULL; + + strp = str; + for (;;) { + dimlist = NULL; + + for (;;) { + if (sscanf(strp, "%d%n", &n, &l) != 1) + E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, + strp - str); + strp += l; + + if (*strp == '-') { + strp++; + + if (sscanf(strp, "%d%n", &n2, &l) != 1) + E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, + strp - str); + strp += l; + } + else + n2 = n; + + if ((n < 0) || (n > n2)) + E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str, + strp - str); + + for (; n <= n2; n++) { + gnode_t *gn; + for (gn = dimlist; gn; gn = gnode_next(gn)) + if (gnode_int32(gn) == n) + break; + if (gn != NULL) + E_FATAL("'%s': Duplicate dimension ending @pos %d\n", + str, strp - str); + + dimlist = glist_add_int32(dimlist, n); + } + + if ((*strp == '\0') || (*strp == '/')) + break; + + if (*strp != ',') + E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str); + + strp++; + } + + veclist = glist_add_ptr(veclist, (void *) dimlist); + + if (*strp == '\0') + break; + + assert(*strp == '/'); + strp++; + } + + /* Convert the glists to arrays; remember the glists are in reverse order of the input! */ + n = glist_count(veclist); /* #Subvectors */ + subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */ + subvec[n] = NULL; /* sentinel */ + + for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) { + gn2 = (glist_t) gnode_ptr(gn); + + n2 = glist_count(gn2); /* Length of this subvector */ + if (n2 <= 0) + E_FATAL("'%s': 0-length subvector\n", str); + + subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */ + subvec[n][n2] = -1; /* sentinel */ + + for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2) + subvec[n][n2] = gnode_int32(gn2); + assert((n2 < 0) && (!gn2)); + } + assert((n < 0) && (!gn)); + + /* Free the glists */ + for (gn = veclist; gn; gn = gnode_next(gn)) { + gn2 = (glist_t) gnode_ptr(gn); + glist_free(gn2); + } + glist_free(veclist); + + return subvec; +} + +void +subvecs_free(int32 **subvecs) +{ + int32 **sv; + + for (sv = subvecs; sv && *sv; ++sv) + ckd_free(*sv); + ckd_free(subvecs); +} + +int +feat_set_subvecs(feat_t *fcb, int32 **subvecs) +{ + int32 **sv; + uint32 n_sv, n_dim, i; + + if (subvecs == NULL) { + subvecs_free(fcb->subvecs); + ckd_free(fcb->sv_buf); + ckd_free(fcb->sv_len); + fcb->n_sv = 0; + fcb->subvecs = NULL; + fcb->sv_len = NULL; + fcb->sv_buf = NULL; + fcb->sv_dim = 0; + return 0; + } + + if (fcb->n_stream != 1) { + E_ERROR("Subvector specifications require single-stream features!"); + return -1; + } + + n_sv = 0; + n_dim = 0; + for (sv = subvecs; sv && *sv; ++sv) { + int32 *d; + + for (d = *sv; d && *d != -1; ++d) { + ++n_dim; + } + ++n_sv; + } + if (n_dim > feat_dimension(fcb)) { + E_ERROR("Total dimensionality of subvector specification %d " + "> feature dimensionality %d\n", n_dim, feat_dimension(fcb)); + return -1; + } + + fcb->n_sv = n_sv; + fcb->subvecs = subvecs; + fcb->sv_len = (uint32 *)ckd_calloc(n_sv, sizeof(*fcb->sv_len)); + fcb->sv_buf = (mfcc_t *)ckd_calloc(n_dim, sizeof(*fcb->sv_buf)); + fcb->sv_dim = n_dim; + for (i = 0; i < n_sv; ++i) { + int32 *d; + for (d = subvecs[i]; d && *d != -1; ++d) { + ++fcb->sv_len[i]; + } + } + + return 0; +} + +/** + * Project feature components to subvectors (if any). + */ +static void +feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) +{ + uint32 i; + + if (fcb->subvecs == NULL) + return; + for (i = 0; i < nfr; ++i) { + mfcc_t *out; + int32 j; + + out = fcb->sv_buf; + for (j = 0; j < fcb->n_sv; ++j) { + int32 *d; + for (d = fcb->subvecs[j]; d && *d != -1; ++d) { + *out++ = inout_feat[i][0][*d]; + } + } + memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf)); + } +} + +mfcc_t *** +feat_array_alloc(feat_t * fcb, int32 nfr) +{ + int32 i, j, k; + mfcc_t *data, *d, ***feat; + + assert(fcb); + assert(nfr > 0); + assert(feat_dimension(fcb) > 0); + + /* Make sure to use the dimensionality of the features *before* + LDA and subvector projection. */ + k = 0; + for (i = 0; i < fcb->n_stream; ++i) + k += fcb->stream_len[i]; + assert(k >= feat_dimension(fcb)); + assert(k >= fcb->sv_dim); + + feat = + (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *)); + data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t)); + + for (i = 0; i < nfr; i++) { + d = data + i * k; + for (j = 0; j < feat_dimension1(fcb); j++) { + feat[i][j] = d; + d += feat_dimension2(fcb, j); + } + } + + return feat; +} + +mfcc_t *** +feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr) +{ + int32 i, k, cf; + mfcc_t*** new_feat; + + assert(fcb); + assert(nfr > 0); + assert(ofr > 0); + assert(feat_dimension(fcb) > 0); + + /* Make sure to use the dimensionality of the features *before* + LDA and subvector projection. */ + k = 0; + for (i = 0; i < fcb->n_stream; ++i) + k += fcb->stream_len[i]; + assert(k >= feat_dimension(fcb)); + assert(k >= fcb->sv_dim); + + new_feat = feat_array_alloc(fcb, nfr); + + cf = (nfr < ofr) ? nfr : ofr; + memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t)); + + feat_array_free(old_feat); + + return new_feat; +} + +void +feat_array_free(mfcc_t ***feat) +{ + ckd_free(feat[0][0]); + ckd_free_2d((void **)feat); +} + +static void +feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i, j; + + assert(fcb); + assert(feat_cepsize(fcb) == 13); + assert(feat_n_stream(fcb) == 4); + assert(feat_stream_len(fcb, 0) == 12); + assert(feat_stream_len(fcb, 1) == 24); + assert(feat_stream_len(fcb, 2) == 3); + assert(feat_stream_len(fcb, 3) == 12); + assert(feat_window_size(fcb) == 4); + + /* CEP; skip C0 */ + memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); + + /* + * DCEP(SHORT): mfc[2] - mfc[-2] + * DCEP(LONG): mfc[4] - mfc[-4] + */ + w = mfc[2] + 1; /* +1 to skip C0 */ + _w = mfc[-2] + 1; + + f = feat[1]; + for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */ + f[i] = w[i] - _w[i]; + + w = mfc[4] + 1; /* +1 to skip C0 */ + _w = mfc[-4] + 1; + + for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */ + f[i] = w[j] - _w[j]; + + /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ + w1 = mfc[3] + 1; /* Final +1 to skip C0 */ + _w1 = mfc[-1] + 1; + w_1 = mfc[1] + 1; + _w_1 = mfc[-3] + 1; + + f = feat[3]; + for (i = 0; i < feat_cepsize(fcb) - 1; i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } + + /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */ + f = feat[2]; + f[0] = mfc[0][0]; + f[1] = mfc[2][0] - mfc[-2][0]; + + d1 = mfc[3][0] - mfc[-1][0]; + d2 = mfc[1][0] - mfc[-3][0]; + f[2] = d1 - d2; +} + + +static void +feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i; + + assert(fcb); + assert(feat_cepsize(fcb) == 13); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == 39); + assert(feat_window_size(fcb) == 3); + + /* CEP; skip C0 */ + memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); + /* + * DCEP: mfc[2] - mfc[-2]; + */ + f = feat[0] + feat_cepsize(fcb) - 1; + w = mfc[2] + 1; /* +1 to skip C0 */ + _w = mfc[-2] + 1; + + for (i = 0; i < feat_cepsize(fcb) - 1; i++) + f[i] = w[i] - _w[i]; + + /* POW: C0, DC0, D2C0 */ + f += feat_cepsize(fcb) - 1; + + f[0] = mfc[0][0]; + f[1] = mfc[2][0] - mfc[-2][0]; + + d1 = mfc[3][0] - mfc[-1][0]; + d2 = mfc[1][0] - mfc[-3][0]; + f[2] = d1 - d2; + + /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ + f += 3; + + w1 = mfc[3] + 1; /* Final +1 to skip C0 */ + _w1 = mfc[-1] + 1; + w_1 = mfc[1] + 1; + _w_1 = mfc[-3] + 1; + + for (i = 0; i < feat_cepsize(fcb) - 1; i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } +} + + +static void +feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_window_size(fcb) == 0); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); +} + +static void +feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + int32 i; + + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2); + assert(feat_window_size(fcb) == 2); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); + + /* + * DCEP: mfc[2] - mfc[-2]; + */ + f = feat[0] + feat_cepsize(fcb); + w = mfc[2]; + _w = mfc[-2]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; +} + +static void +feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i; + + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3); + assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); + + /* + * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; + */ + f = feat[0] + feat_cepsize(fcb); + w = mfc[FEAT_DCEP_WIN]; + _w = mfc[-FEAT_DCEP_WIN]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; + + /* + * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), + * where w = FEAT_DCEP_WIN + */ + f += feat_cepsize(fcb); + + w1 = mfc[FEAT_DCEP_WIN + 1]; + _w1 = mfc[-FEAT_DCEP_WIN + 1]; + w_1 = mfc[FEAT_DCEP_WIN - 1]; + _w_1 = mfc[-FEAT_DCEP_WIN - 1]; + + for (i = 0; i < feat_cepsize(fcb); i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } +} + +static void +feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i; + + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4); + assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); + + /* + * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; + */ + f = feat[0] + feat_cepsize(fcb); + w = mfc[FEAT_DCEP_WIN]; + _w = mfc[-FEAT_DCEP_WIN]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; + + /* + * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2; + */ + f += feat_cepsize(fcb); + w = mfc[FEAT_DCEP_WIN * 2]; + _w = mfc[-FEAT_DCEP_WIN * 2]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; + + /* + * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), + * where w = FEAT_DCEP_WIN + */ + f += feat_cepsize(fcb); + + w1 = mfc[FEAT_DCEP_WIN + 1]; + _w1 = mfc[-FEAT_DCEP_WIN + 1]; + w_1 = mfc[FEAT_DCEP_WIN - 1]; + _w_1 = mfc[-FEAT_DCEP_WIN - 1]; + + for (i = 0; i < feat_cepsize(fcb); i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } +} + +static void +feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + int32 win, i, j; + + win = feat_window_size(fcb); + + /* Concatenate input features */ + for (i = -win; i <= win; ++i) { + uint32 spos = 0; + + for (j = 0; j < feat_n_stream(fcb); ++j) { + uint32 stream_len; + + /* Unscale the stream length by the window. */ + stream_len = feat_stream_len(fcb, j) / (2 * win + 1); + memcpy(feat[j] + ((i + win) * stream_len), + mfc[i] + spos, + stream_len * sizeof(mfcc_t)); + spos += stream_len; + } + } +} + +feat_t * +feat_init(char const *type, cmn_type_t cmn, int32 varnorm, + agc_type_t agc, int32 breport, int32 cepsize) +{ + feat_t *fcb; + + if (cepsize == 0) + cepsize = 13; + if (breport) + E_INFO + ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n", + type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]); + + fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t)); + fcb->refcount = 1; + fcb->name = (char *) ckd_salloc(type); + if (strcmp(type, "s2_4x") == 0) { + /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */ + if (cepsize != 13) { + E_ERROR("s2_4x features require cepsize == 13\n"); + ckd_free(fcb); + return NULL; + } + fcb->cepsize = 13; + fcb->n_stream = 4; + fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32)); + fcb->stream_len[0] = 12; + fcb->stream_len[1] = 24; + fcb->stream_len[2] = 3; + fcb->stream_len[3] = 12; + fcb->out_dim = 51; + fcb->window_size = 4; + fcb->compute_feat = feat_s2_4x_cep2feat; + } + else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) { + /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */ + if (cepsize != 13) { + E_ERROR("s2_4x features require cepsize == 13\n"); + ckd_free(fcb); + return NULL; + } + fcb->cepsize = 13; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = 39; + fcb->out_dim = 39; + fcb->window_size = 3; + fcb->compute_feat = feat_s3_1x39_cep2feat; + } + else if (strncmp(type, "1s_c_d_dd", 9) == 0) { + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = cepsize * 3; + fcb->out_dim = cepsize * 3; + fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */ + fcb->compute_feat = feat_1s_c_d_dd_cep2feat; + } + else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) { + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = cepsize * 4; + fcb->out_dim = cepsize * 4; + fcb->window_size = FEAT_DCEP_WIN * 2; + fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat; + } + else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) { + /* 1-stream cep/dcep */ + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = feat_cepsize(fcb) * 2; + fcb->out_dim = fcb->stream_len[0]; + fcb->window_size = 2; + fcb->compute_feat = feat_s3_cep_dcep; + } + else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) { + /* 1-stream cep */ + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = feat_cepsize(fcb); + fcb->out_dim = fcb->stream_len[0]; + fcb->window_size = 0; + fcb->compute_feat = feat_s3_cep; + } + else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) { + /* 1-stream cep with frames concatenated, so called cepwin features */ + if (strncmp(type, "1s_3c", 5) == 0) + fcb->window_size = 3; + else + fcb->window_size = 4; + + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1); + fcb->out_dim = fcb->stream_len[0]; + fcb->compute_feat = feat_copy; + } + else { + int32 i, k, l; + size_t len; + char *strp; + char *mtype = ckd_salloc(type); + char *wd = ckd_salloc(type); + /* + * Generic definition: Format should be %d,%d,%d,...,%d (i.e., + * comma separated list of feature stream widths; #items = + * #streams). An optional window size (frames will be + * concatenated) is also allowed, which can be specified with + * a colon after the list of feature streams. + */ + len = strlen(mtype); + k = 0; + for (i = 1; i < len - 1; i++) { + if (mtype[i] == ',') { + mtype[i] = ' '; + k++; + } + else if (mtype[i] == ':') { + mtype[i] = '\0'; + fcb->window_size = atoi(mtype + i + 1); + break; + } + } + k++; /* Presumably there are (#commas+1) streams */ + fcb->n_stream = k; + fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32)); + + /* Scan individual feature stream lengths */ + strp = mtype; + i = 0; + fcb->out_dim = 0; + fcb->cepsize = 0; + while (sscanf(strp, "%s%n", wd, &l) == 1) { + strp += l; + if ((i >= fcb->n_stream) + || (sscanf(wd, "%u", &(fcb->stream_len[i])) != 1) + || (fcb->stream_len[i] <= 0)) + E_FATAL("Bad feature type argument\n"); + /* Input size before windowing */ + fcb->cepsize += fcb->stream_len[i]; + if (fcb->window_size > 0) + fcb->stream_len[i] *= (fcb->window_size * 2 + 1); + /* Output size after windowing */ + fcb->out_dim += fcb->stream_len[i]; + i++; + } + if (i != fcb->n_stream) + E_FATAL("Bad feature type argument\n"); + if (fcb->cepsize != cepsize) + E_FATAL("Bad feature type argument\n"); + + /* Input is already the feature stream */ + fcb->compute_feat = feat_copy; + ckd_free(mtype); + ckd_free(wd); + } + + if (cmn != CMN_NONE) + fcb->cmn_struct = cmn_init(feat_cepsize(fcb)); + fcb->cmn = cmn; + fcb->varnorm = varnorm; + if (agc != AGC_NONE) { + fcb->agc_struct = agc_init(); + /* + * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things + * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY + * switches to EMAX + */ + /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */ + agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0); + } + fcb->agc = agc; + /* + * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt() + */ + fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE, + feat_cepsize(fcb), + sizeof(mfcc_t)); + /* This one is actually just an array of pointers to "flatten out" + * wraparounds. */ + fcb->tmpcepbuf = (mfcc_t** )ckd_calloc(2 * feat_window_size(fcb) + 1, + sizeof(*fcb->tmpcepbuf)); + + return fcb; +} + + +void +feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp) +{ + uint32 i, j, k; + + for (i = 0; i < nfr; i++) { + fprintf(fp, "%8d:\n", i); + + for (j = 0; j < feat_dimension1(fcb); j++) { + fprintf(fp, "\t%2d:", j); + + for (k = 0; k < feat_dimension2(fcb, j); k++) + fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k])); + fprintf(fp, "\n"); + } + } + + fflush(fp); +} + +static void +feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) +{ + cmn_type_t cmn_type = fcb->cmn; + + if (!(beginutt && endutt) + && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */ + fcb->cmn = cmn_type = CMN_PRIOR; + + switch (cmn_type) { + case CMN_CURRENT: + cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr); + break; + case CMN_PRIOR: + cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr); + if (endutt) + cmn_prior_update(fcb->cmn_struct); + break; + default: + ; + } + cep_dump_dbg(fcb, mfc, nfr, "After CMN"); +} + +static void +feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) +{ + agc_type_t agc_type = fcb->agc; + + if (!(beginutt && endutt) + && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */ + agc_type = AGC_EMAX; + + switch (agc_type) { + case AGC_MAX: + agc_max(fcb->agc_struct, mfc, nfr); + break; + case AGC_EMAX: + agc_emax(fcb->agc_struct, mfc, nfr); + if (endutt) + agc_emax_update(fcb->agc_struct); + break; + case AGC_NOISE: + agc_noise(fcb->agc_struct, mfc, nfr); + break; + default: + ; + } + cep_dump_dbg(fcb, mfc, nfr, "After AGC"); +} + +static void +feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat) +{ + int32 i; + + cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)"); + + /* Create feature vectors */ + for (i = win; i < nfr - win; i++) { + fcb->compute_feat(fcb, mfc + i, feat[i - win]); + } + + feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation"); + + if (fcb->lda) { + feat_lda_transform(fcb, feat, nfr - win * 2); + feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA"); + } + + if (fcb->subvecs) { + feat_subvec_project(fcb, feat, nfr - win * 2); + feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection"); + } +} + + +/** + * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data). + * If out_mfc is NULL, no actual reading will be done, and the number of + * frames (plus padding) that would be read is returned. + * + * It's important that normalization is done before padding because + * frames outside the data we are interested in shouldn't be taken + * into normalization stats. + * + * @return # frames read (plus padding) if successful, -1 if + * error (e.g., mfc array too small). + */ +static int32 +feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win, + int32 sf, int32 ef, + mfcc_t ***out_mfc, + int32 maxfr, + int32 cepsize) +{ + FILE *fp; + int32 n_float32; + float32 *float_feat; + struct stat statbuf; + int32 i, n, byterev; + int32 start_pad, end_pad; + mfcc_t **mfc; + + /* Initialize the output pointer to NULL, so that any attempts to + free() it if we fail before allocating it will not segfault! */ + if (out_mfc) + *out_mfc = NULL; + E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef); + if (ef >= 0 && ef <= sf) { + E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf); + return -1; + } + + /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */ + if ((stat_retry(file, &statbuf) < 0) + || ((fp = fopen(file, "rb")) == NULL)) { + E_ERROR_SYSTEM("Failed to open file '%s' for reading", file); + return -1; + } + + /* Read #floats in header */ + if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) { + E_ERROR("%s: fread(#floats) failed\n", file); + fclose(fp); + return -1; + } + + /* Check if n_float32 matches file size */ + byterev = 0; + if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */ + n = n_float32; + SWAP_INT32(&n); + + if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */ + E_ERROR + ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n", + file, n_float32, n_float32, statbuf.st_size, + statbuf.st_size); + fclose(fp); + return -1; + } + + n_float32 = n; + byterev = 1; + } + if (n_float32 <= 0) { + E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32); + fclose(fp); + return -1; + } + + /* Convert n to #frames of input */ + n = n_float32 / cepsize; + if (n * cepsize != n_float32) { + E_ERROR("Header size field: %d; not multiple of %d\n", n_float32, + cepsize); + fclose(fp); + return -1; + } + + /* Check start and end frames */ + if (sf > 0) { + if (sf >= n) { + E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file, + sf, n); + fclose(fp); + return -1; + } + } + if (ef < 0) + ef = n-1; + else if (ef >= n) { + E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n", + file, ef, n); + ef = n-1; + } + + /* Add window to start and end frames */ + sf -= win; + ef += win; + if (sf < 0) { + start_pad = -sf; + sf = 0; + } + else + start_pad = 0; + if (ef >= n) { + end_pad = ef - n + 1; + ef = n - 1; + } + else + end_pad = 0; + + /* Limit n if indicated by [sf..ef] */ + if ((ef - sf + 1) < n) + n = (ef - sf + 1); + if (maxfr > 0 && n + start_pad + end_pad > maxfr) { + E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n", + file, maxfr, n + start_pad + end_pad); + fclose(fp); + return -1; + } + + /* If no output buffer was supplied, then skip the actual data reading. */ + if (out_mfc != NULL) { + /* Position at desired start frame and read actual MFC data */ + mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t)); + if (sf > 0) + fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR); + n_float32 = n * cepsize; +#ifdef FIXED_POINT + float_feat = ckd_calloc(n_float32, sizeof(float32)); +#else + float_feat = mfc[start_pad]; +#endif + if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) { + E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize); + ckd_free_2d(mfc); + fclose(fp); + return -1; + } + if (byterev) { + for (i = 0; i < n_float32; i++) { + SWAP_FLOAT32(&float_feat[i]); + } + } +#ifdef FIXED_POINT + for (i = 0; i < n_float32; ++i) { + mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]); + } + ckd_free(float_feat); +#endif + + /* Normalize */ + feat_cmn(fcb, mfc + start_pad, n, 1, 1); + feat_agc(fcb, mfc + start_pad, n, 1, 1); + + /* Replicate start and end frames if necessary. */ + for (i = 0; i < start_pad; ++i) + memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t)); + for (i = 0; i < end_pad; ++i) + memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1], + cepsize * sizeof(mfcc_t)); + + *out_mfc = mfc; + } + + fclose(fp); + return n + start_pad + end_pad; +} + + + +int32 +feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext, + int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr) +{ + char *path; + char *ps = "/"; + int32 win, nfr; + size_t file_length, cepext_length, path_length = 0; + mfcc_t **mfc; + + if (fcb->cepsize <= 0) { + E_ERROR("Bad cepsize: %d\n", fcb->cepsize); + return -1; + } + + if (cepext == NULL) + cepext = ""; + + /* + * Create mfc filename, combining file, dir and extension if + * necessary + */ + + /* + * First we decide about the path. If dir is defined, then use + * it. Otherwise assume the filename already contains the path. + */ + if (dir == NULL) { + dir = ""; + ps = ""; + /* + * This is not true but some 3rd party apps + * may parse the output explicitly checking for this line + */ + E_INFO("At directory . (current directory)\n"); + } + else { + E_INFO("At directory %s\n", dir); + /* + * Do not forget the path separator! + */ + path_length += strlen(dir) + 1; + } + + /* + * Include cepext, if it's not already part of the filename. + */ + file_length = strlen(file); + cepext_length = strlen(cepext); + if ((file_length > cepext_length) + && (strcmp(file + file_length - cepext_length, cepext) == 0)) { + cepext = ""; + cepext_length = 0; + } + + /* + * Do not forget the '\0' + */ + path_length += file_length + cepext_length + 1; + path = (char*) ckd_calloc(path_length, sizeof(char)); + +#ifdef HAVE_SNPRINTF + /* + * Paranoia is our best friend... + */ + while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) { + path_length = file_length; + path = (char*) ckd_realloc(path, path_length * sizeof(char)); + } +#else + sprintf(path, "%s%s%s%s", dir, ps, file, cepext); +#endif + + win = feat_window_size(fcb); + /* Pad maxfr with win, so we read enough raw feature data to + * calculate the requisite number of dynamic features. */ + if (maxfr >= 0) + maxfr += win * 2; + + if (feat != NULL) { + /* Read mfc file including window or padding if necessary. */ + nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize); + ckd_free(path); + if (nfr < 0) { + ckd_free_2d((void **) mfc); + return -1; + } + + /* Actually compute the features */ + feat_compute_utt(fcb, mfc, nfr, win, feat); + + ckd_free_2d((void **) mfc); + } + else { + /* Just calculate the number of frames we would need. */ + nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize); + ckd_free(path); + if (nfr < 0) + return nfr; + } + + + return (nfr - win * 2); +} + +static int32 +feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep, + int32 nfr, mfcc_t *** ofeat) +{ + mfcc_t **cepbuf; + int32 i, win, cepsize; + + win = feat_window_size(fcb); + cepsize = feat_cepsize(fcb); + + /* Copy and pad out the utterance (this requires that the + * feature computation functions always access the buffer via + * the frame pointers, which they do) */ + cepbuf = (mfcc_t **)ckd_calloc(nfr + win * 2, sizeof(mfcc_t *)); + memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *)); + + /* Do normalization before we interpolate on the boundary */ + feat_cmn(fcb, cepbuf + win, nfr, 1, 1); + feat_agc(fcb, cepbuf + win, nfr, 1, 1); + + /* Now interpolate */ + for (i = 0; i < win; ++i) { + cepbuf[i] = fcb->cepbuf[i]; + memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t)); + cepbuf[nfr + win + i] = fcb->cepbuf[win + i]; + memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t)); + } + /* Compute as usual. */ + feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat); + ckd_free(cepbuf); + return nfr; +} + +int32 +feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep, + int32 beginutt, int32 endutt, mfcc_t *** ofeat) +{ + int32 win, cepsize, nbufcep; + int32 i, j, nfeatvec; + int32 zero = 0; + + /* Avoid having to check this everywhere. */ + if (inout_ncep == NULL) inout_ncep = &zero; + + /* Special case for entire utterances. */ + if (beginutt && endutt && *inout_ncep > 0) + return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat); + + win = feat_window_size(fcb); + cepsize = feat_cepsize(fcb); + + /* Empty the input buffer on start of utterance. */ + if (beginutt) + fcb->bufpos = fcb->curpos; + + /* Calculate how much data is in the buffer already. */ + nbufcep = fcb->bufpos - fcb->curpos; + if (nbufcep < 0) + nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos; + /* Add any data that we have to replicate. */ + if (beginutt && *inout_ncep > 0) + nbufcep += win; + if (endutt) + nbufcep += win; + + /* Only consume as much input as will fit in the buffer. */ + if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) { + /* We also can't overwrite the trailing window, hence the + * reason why win is subtracted here. */ + *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win; + /* Cancel end of utterance processing. */ + endutt = FALSE; + } + + /* FIXME: Don't modify the input! */ + feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt); + feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt); + + /* Replicate first frame into the first win frames if we're at the + * beginning of the utterance and there was some actual input to + * deal with. (FIXME: Not entirely sure why that condition) */ + if (beginutt && *inout_ncep > 0) { + for (i = 0; i < win; i++) { + memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0], + cepsize * sizeof(mfcc_t)); + fcb->bufpos %= LIVEBUFBLOCKSIZE; + } + /* Move the current pointer past this data. */ + fcb->curpos = fcb->bufpos; + nbufcep -= win; + } + + /* Copy in frame data to the circular buffer. */ + for (i = 0; i < *inout_ncep; ++i) { + memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i], + cepsize * sizeof(mfcc_t)); + fcb->bufpos %= LIVEBUFBLOCKSIZE; + ++nbufcep; + } + + /* Replicate last frame into the last win frames if we're at the + * end of the utterance (even if there was no input, so we can + * flush the output). */ + if (endutt) { + int32 tpos; /* Index of last input frame. */ + if (fcb->bufpos == 0) + tpos = LIVEBUFBLOCKSIZE - 1; + else + tpos = fcb->bufpos - 1; + for (i = 0; i < win; ++i) { + memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos], + cepsize * sizeof(mfcc_t)); + fcb->bufpos %= LIVEBUFBLOCKSIZE; + } + } + + /* We have to leave the trailing window of frames. */ + nfeatvec = nbufcep - win; + if (nfeatvec <= 0) + return 0; /* Do nothing. */ + + for (i = 0; i < nfeatvec; ++i) { + /* Handle wraparound cases. */ + if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) { + /* Use tmpcepbuf for this case. Actually, we just need the pointers. */ + for (j = -win; j <= win; ++j) { + int32 tmppos = + (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE; + fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos]; + } + fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]); + } + else { + fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]); + } + /* Move the read pointer forward. */ + ++fcb->curpos; + fcb->curpos %= LIVEBUFBLOCKSIZE; + } + + if (fcb->lda) + feat_lda_transform(fcb, ofeat, nfeatvec); + + if (fcb->subvecs) + feat_subvec_project(fcb, ofeat, nfeatvec); + + return nfeatvec; +} + +void +feat_update_stats(feat_t *fcb) +{ + if (fcb->cmn == CMN_PRIOR) { + cmn_prior_update(fcb->cmn_struct); + } + if (fcb->agc == AGC_EMAX || fcb->agc == AGC_MAX) { + agc_emax_update(fcb->agc_struct); + } +} + +feat_t * +feat_retain(feat_t *f) +{ + ++f->refcount; + return f; +} + +int +feat_free(feat_t * f) +{ + if (f == NULL) + return 0; + if (--f->refcount > 0) + return f->refcount; + + if (f->cepbuf) + ckd_free_2d((void **) f->cepbuf); + ckd_free(f->tmpcepbuf); + + if (f->name) { + ckd_free((void *) f->name); + } + if (f->lda) + ckd_free_3d((void ***) f->lda); + + ckd_free(f->stream_len); + ckd_free(f->sv_len); + ckd_free(f->sv_buf); + subvecs_free(f->subvecs); + + cmn_free(f->cmn_struct); + agc_free(f->agc_struct); + + ckd_free(f); + return 0; +} + + +void +feat_report(feat_t * f) +{ + int i; + E_INFO_NOFN("Initialization of feat_t, report:\n"); + E_INFO_NOFN("Feature type = %s\n", f->name); + E_INFO_NOFN("Cepstral size = %d\n", f->cepsize); + E_INFO_NOFN("Number of streams = %d\n", f->n_stream); + for (i = 0; i < f->n_stream; i++) { + E_INFO_NOFN("Vector size of stream[%d]: %d\n", i, + f->stream_len[i]); + } + E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv); + for (i = 0; i < f->n_sv; i++) { + int32 *sv; + + E_INFO_NOFN("Components of subvector[%d]:", i); + for (sv = f->subvecs[i]; sv && *sv != -1; ++sv) + E_INFOCONT(" %d", *sv); + E_INFOCONT("\n"); + } + E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn); + E_INFO_NOFN("Whether AGC is used = %d\n", f->agc); + E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm); + E_INFO_NOFN("\n"); +} diff --git a/media/sphinxbase/src/libsphinxbase/feat/lda.c b/media/sphinxbase/src/libsphinxbase/feat/lda.c new file mode 100644 index 000000000..182b029de --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/feat/lda.c @@ -0,0 +1,158 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * lda.c -- Read and apply LDA matrices to features. + * + * Author: David Huggins-Daines + */ + +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4018) +#endif + +#include "sphinxbase/feat.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/bio.h" +#include "sphinxbase/err.h" + +#define MATRIX_FILE_VERSION "0.1" + +int32 +feat_read_lda(feat_t *feat, const char *ldafile, int32 dim) +{ + FILE *fh; + int32 byteswap; + uint32 chksum, i, m, n; + char **argname, **argval; + + assert(feat); + if (feat->n_stream != 1) { + E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n", + feat->n_stream); + return -1; + } + + if ((fh = fopen(ldafile, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile); + return -1; + } + + if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) { + E_ERROR("Failed to read header from transform file '%s'\n", ldafile); + fclose(fh); + return -1; + } + + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0) + E_WARN("%s: Version mismatch: %s, expecting %s\n", + ldafile, argval[i], MATRIX_FILE_VERSION); + } + } + + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + if (feat->lda) + ckd_free_3d((void ***)feat->lda); + + { + /* Use a temporary variable to avoid strict-aliasing problems. */ + void ***outlda; + + if (bio_fread_3d(&outlda, sizeof(float32), + &feat->n_lda, &m, &n, + fh, byteswap, &chksum) < 0) { + E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile); + fclose(fh); + return -1; + } + feat->lda = (void *)outlda; + } + fclose(fh); + +#ifdef FIXED_POINT + /* FIXME: This is a fragile hack that depends on mfcc_t and + * float32 being the same size (which they are, but...) */ + for (i = 0; i < feat->n_lda * m * n; ++i) { + feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]); + } +#endif + + /* Note that SphinxTrain stores the eigenvectors as row vectors. */ + if (n != feat->stream_len[0]) + E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]); + + /* Override dim from file if it is 0 or greater than m. */ + if (dim > m || dim <= 0) { + dim = m; + } + feat->out_dim = dim; + + return 0; +} + +void +feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) +{ + mfcc_t *tmp; + uint32 i, j, k; + + tmp = ckd_calloc(fcb->stream_len[0], sizeof(mfcc_t)); + for (i = 0; i < nfr; ++i) { + /* Do the matrix multiplication inline here since fcb->lda + * is transposed (eigenvectors in rows not columns). */ + /* FIXME: In the future we ought to use the BLAS. */ + memset(tmp, 0, sizeof(mfcc_t) * fcb->stream_len[0]); + for (j = 0; j < feat_dimension(fcb); ++j) { + for (k = 0; k < fcb->stream_len[0]; ++k) { + tmp[j] += MFCCMUL(inout_feat[i][0][k], fcb->lda[0][j][k]); + } + } + memcpy(inout_feat[i][0], tmp, fcb->stream_len[0] * sizeof(mfcc_t)); + } + ckd_free(tmp); +} diff --git a/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c b/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c new file mode 100644 index 000000000..374897754 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/fsg_model.c @@ -0,0 +1,944 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers. */ +#ifdef _WIN32_WCE +/*MC in a debug build it's implicitly included by assert.h + but you need this in a release build */ +#include +#else +#include +#endif /* _WIN32_WCE */ +#include +#include +#include + +/* SphinxBase headers. */ +#include "sphinxbase/err.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/prim_type.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/fsg_model.h" + +/** + * Adjacency list (opaque) for a state in an FSG. + * + * Actually we use hash tables so that random access is a bit faster. + * Plus it allows us to make the lookup code a bit less ugly. + */ + +struct trans_list_s { + hash_table_t *null_trans; /* Null transitions keyed by state. */ + hash_table_t *trans; /* Lists of non-null transitions keyed by state. */ +}; + +/** + * Implementation of arc iterator. + */ +struct fsg_arciter_s { + hash_iter_t *itor, *null_itor; + gnode_t *gn; +}; + +#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN" +#define FSG_MODEL_END_DECL "FSG_END" +#define FSG_MODEL_N_DECL "N" +#define FSG_MODEL_NUM_STATES_DECL "NUM_STATES" +#define FSG_MODEL_S_DECL "S" +#define FSG_MODEL_START_STATE_DECL "START_STATE" +#define FSG_MODEL_F_DECL "F" +#define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE" +#define FSG_MODEL_T_DECL "T" +#define FSG_MODEL_TRANSITION_DECL "TRANSITION" +#define FSG_MODEL_COMMENT_CHAR '#' + + +static int32 +nextline_str2words(FILE * fp, int32 * lineno, + char **lineptr, char ***wordptr) +{ + for (;;) { + size_t len; + int32 n; + + ckd_free(*lineptr); + if ((*lineptr = fread_line(fp, &len)) == NULL) + return -1; + + (*lineno)++; + + if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) + continue; /* Skip comment lines */ + + n = str2words(*lineptr, NULL, 0); + if (n == 0) + continue; /* Skip blank lines */ + + /* Abuse of realloc(), but this doesn't have to be fast. */ + if (*wordptr == NULL) + *wordptr = ckd_calloc(n, sizeof(**wordptr)); + else + *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); + return str2words(*lineptr, *wordptr, n); + } +} + +void +fsg_model_trans_add(fsg_model_t * fsg, + int32 from, int32 to, int32 logp, int32 wid) +{ + fsg_link_t *link; + glist_t gl; + gnode_t *gn; + + if (fsg->trans[from].trans == NULL) + fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES); + + /* Check for duplicate link (i.e., link already exists with label=wid) */ + for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) { + link = (fsg_link_t *) gnode_ptr(gn); + if (link->wid == wid) { + if (link->logs2prob < logp) + link->logs2prob = logp; + return; + } + } + + /* Create transition object */ + link = listelem_malloc(fsg->link_alloc); + link->from_state = from; + link->to_state = to; + link->logs2prob = logp; + link->wid = wid; + + /* Add it to the list of transitions and update the hash table */ + gl = glist_add_ptr(gl, (void *) link); + hash_table_replace_bkey(fsg->trans[from].trans, + (char const *) &link->to_state, + sizeof(link->to_state), gl); +} + +int32 +fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, + int32 logp, int32 wid) +{ + fsg_link_t *link, *link2; + + /* Check for transition probability */ + if (logp > 0) { + E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", + from, to); + } + + /* Self-loop null transitions (with prob <= 1.0) are redundant */ + if (from == to) + return -1; + + if (fsg->trans[from].null_trans == NULL) + fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES); + + /* Check for a duplicate link; if found, keep the higher prob */ + link = fsg_model_null_trans(fsg, from, to); + if (link) { + if (link->logs2prob < logp) { + link->logs2prob = logp; + return 0; + } + else + return -1; + } + + /* Create null transition object */ + link = listelem_malloc(fsg->link_alloc); + link->from_state = from; + link->to_state = to; + link->logs2prob = logp; + link->wid = -1; + + link2 = (fsg_link_t *) + hash_table_enter_bkey(fsg->trans[from].null_trans, + (char const *) &link->to_state, + sizeof(link->to_state), link); + assert(link == link2); + + return 1; +} + +int32 +fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, + int32 logp) +{ + return fsg_model_tag_trans_add(fsg, from, to, logp, -1); +} + +glist_t +fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls) +{ + gnode_t *gn1; + int updated; + fsg_link_t *tl1, *tl2; + int32 k, n; + + E_INFO("Computing transitive closure for null transitions\n"); + + /* If our caller didn't give us a list of null-transitions, + make such a list. Just loop through all the FSG states, + and all the null-transitions in that state (which are kept in + their own hash table). */ + if (nulls == NULL) { + int i; + for (i = 0; i < fsg->n_state; ++i) { + hash_iter_t *itor; + hash_table_t *null_trans = fsg->trans[i].null_trans; + if (null_trans == NULL) + continue; + for (itor = hash_table_iter(null_trans); + itor != NULL; + itor = hash_table_iter_next(itor)) { + nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent)); + } + } + } + + /* + * Probably not the most efficient closure implementation, in general, but + * probably reasonably efficient for a sparse null transition matrix. + */ + n = 0; + do { + updated = FALSE; + + for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { + hash_iter_t *itor; + + tl1 = (fsg_link_t *) gnode_ptr(gn1); + assert(tl1->wid < 0); + + if (fsg->trans[tl1->to_state].null_trans == NULL) + continue; + + for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans); + itor; itor = hash_table_iter_next(itor)) { + + tl2 = (fsg_link_t *) hash_entry_val(itor->ent); + + k = fsg_model_null_trans_add(fsg, + tl1->from_state, + tl2->to_state, + tl1->logs2prob + + tl2->logs2prob); + if (k >= 0) { + updated = TRUE; + if (k > 0) { + nulls = glist_add_ptr(nulls, (void *) + fsg_model_null_trans + (fsg, tl1->from_state, + tl2->to_state)); + n++; + } + } + } + } + } while (updated); + + E_INFO("%d null transitions added\n", n); + + return nulls; +} + +glist_t +fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j) +{ + void *val; + + if (fsg->trans[i].trans == NULL) + return NULL; + if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j, + sizeof(j), &val) < 0) + return NULL; + return (glist_t) val; +} + +fsg_link_t * +fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j) +{ + void *val; + + if (fsg->trans[i].null_trans == NULL) + return NULL; + if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j, + sizeof(j), &val) < 0) + return NULL; + return (fsg_link_t *) val; +} + +fsg_arciter_t * +fsg_model_arcs(fsg_model_t * fsg, int32 i) +{ + fsg_arciter_t *itor; + + if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + if (fsg->trans[i].null_trans) + itor->null_itor = hash_table_iter(fsg->trans[i].null_trans); + if (fsg->trans[i].trans) + itor->itor = hash_table_iter(fsg->trans[i].trans); + if (itor->itor != NULL) + itor->gn = hash_entry_val(itor->itor->ent); + return itor; +} + +fsg_link_t * +fsg_arciter_get(fsg_arciter_t * itor) +{ + /* Iterate over non-null arcs first. */ + if (itor->gn) + return (fsg_link_t *) gnode_ptr(itor->gn); + else if (itor->null_itor) + return (fsg_link_t *) hash_entry_val(itor->null_itor->ent); + else + return NULL; +} + +fsg_arciter_t * +fsg_arciter_next(fsg_arciter_t * itor) +{ + /* Iterate over non-null arcs first. */ + if (itor->gn) { + itor->gn = gnode_next(itor->gn); + /* Move to the next destination arc. */ + if (itor->gn == NULL) { + itor->itor = hash_table_iter_next(itor->itor); + if (itor->itor != NULL) + itor->gn = hash_entry_val(itor->itor->ent); + else if (itor->null_itor == NULL) + goto stop_iteration; + } + } + else { + if (itor->null_itor == NULL) + goto stop_iteration; + itor->null_itor = hash_table_iter_next(itor->null_itor); + if (itor->null_itor == NULL) + goto stop_iteration; + } + return itor; + stop_iteration: + fsg_arciter_free(itor); + return NULL; + +} + +void +fsg_arciter_free(fsg_arciter_t * itor) +{ + if (itor == NULL) + return; + hash_table_iter_free(itor->null_itor); + hash_table_iter_free(itor->itor); + ckd_free(itor); +} + +int +fsg_model_word_id(fsg_model_t * fsg, char const *word) +{ + int wid; + + /* Search for an existing word matching this. */ + for (wid = 0; wid < fsg->n_word; ++wid) { + if (0 == strcmp(fsg->vocab[wid], word)) + break; + } + /* If not found, add this to the vocab. */ + if (wid == fsg->n_word) + return -1; + return wid; +} + +int +fsg_model_word_add(fsg_model_t * fsg, char const *word) +{ + int wid, old_size; + + /* Search for an existing word matching this. */ + wid = fsg_model_word_id(fsg, word); + /* If not found, add this to the vocab. */ + if (wid == -1) { + wid = fsg->n_word; + if (fsg->n_word == fsg->n_word_alloc) { + old_size = fsg->n_word_alloc; + fsg->n_word_alloc += 10; + fsg->vocab = ckd_realloc(fsg->vocab, + fsg->n_word_alloc * + sizeof(*fsg->vocab)); + if (fsg->silwords) + fsg->silwords = + bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc); + if (fsg->altwords) + fsg->altwords = + bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc); + } + ++fsg->n_word; + fsg->vocab[wid] = ckd_salloc(word); + } + return wid; +} + +int +fsg_model_add_silence(fsg_model_t * fsg, char const *silword, + int state, float32 silprob) +{ + int32 logsilp; + int n_trans, silwid, src; + + E_INFO("Adding silence transitions for %s to FSG\n", silword); + + silwid = fsg_model_word_add(fsg, silword); + logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); + if (fsg->silwords == NULL) + fsg->silwords = bitvec_alloc(fsg->n_word_alloc); + bitvec_set(fsg->silwords, silwid); + + n_trans = 0; + if (state == -1) { + for (src = 0; src < fsg->n_state; src++) { + fsg_model_trans_add(fsg, src, src, logsilp, silwid); + ++n_trans; + } + } + else { + fsg_model_trans_add(fsg, state, state, logsilp, silwid); + ++n_trans; + } + + E_INFO("Added %d silence word transitions\n", n_trans); + return n_trans; +} + +int +fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, + char const *altword) +{ + int i, basewid, altwid; + int ntrans; + + /* FIXME: This will get slow, eventually... */ + for (basewid = 0; basewid < fsg->n_word; ++basewid) + if (0 == strcmp(fsg->vocab[basewid], baseword)) + break; + if (basewid == fsg->n_word) { + E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); + return -1; + } + altwid = fsg_model_word_add(fsg, altword); + if (fsg->altwords == NULL) + fsg->altwords = bitvec_alloc(fsg->n_word_alloc); + bitvec_set(fsg->altwords, altwid); + if (fsg_model_is_filler(fsg, basewid)) { + if (fsg->silwords == NULL) + fsg->silwords = bitvec_alloc(fsg->n_word_alloc); + bitvec_set(fsg->silwords, altwid); + } + + E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n", + baseword, altword)); + + /* Look for all transitions involving baseword and duplicate them. */ + /* FIXME: This will also get slow, eventually... */ + ntrans = 0; + for (i = 0; i < fsg->n_state; ++i) { + hash_iter_t *itor; + if (fsg->trans[i].trans == NULL) + continue; + for (itor = hash_table_iter(fsg->trans[i].trans); itor; + itor = hash_table_iter_next(itor)) { + glist_t trans; + gnode_t *gn; + + trans = hash_entry_val(itor->ent); + for (gn = trans; gn; gn = gnode_next(gn)) { + fsg_link_t *fl = gnode_ptr(gn); + if (fl->wid == basewid) { + fsg_link_t *link; + + /* Create transition object */ + link = listelem_malloc(fsg->link_alloc); + link->from_state = fl->from_state; + link->to_state = fl->to_state; + link->logs2prob = fl->logs2prob; /* FIXME!!!??? */ + link->wid = altwid; + + trans = glist_add_ptr(trans, (void *) link); + ++ntrans; + } + } + hash_entry_val(itor->ent) = trans; + } + } + + E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans)); + return ntrans; +} + + +fsg_model_t * +fsg_model_init(char const *name, logmath_t * lmath, float32 lw, + int32 n_state) +{ + fsg_model_t *fsg; + + /* Allocate basic stuff. */ + fsg = ckd_calloc(1, sizeof(*fsg)); + fsg->refcount = 1; + fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t)); + fsg->lmath = lmath; + fsg->name = name ? ckd_salloc(name) : NULL; + fsg->n_state = n_state; + fsg->lw = lw; + + fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans)); + + return fsg; +} + +fsg_model_t * +fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + hash_table_t *vocab; + hash_iter_t *itor; + int32 lastwid; + char **wordptr; + char *lineptr; + char *fsgname; + int32 lineno; + int32 n, i, j; + int n_state, n_trans, n_null_trans; + glist_t nulls; + float32 p; + + lineno = 0; + vocab = hash_table_new(32, FALSE); + wordptr = NULL; + lineptr = NULL; + nulls = NULL; + fsgname = NULL; + fsg = NULL; + + /* Scan upto FSG_BEGIN header */ + for (;;) { + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if (n < 0) { + E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); + goto parse_error; + } + + if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { + if (n > 2) { + E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", + lineno); + goto parse_error; + } + break; + } + } + /* Save FSG name, or it will get clobbered below :(. + * If name is missing, try the default. + */ + if (n == 2) { + fsgname = ckd_salloc(wordptr[1]); + } + else { + E_WARN("FSG name is missing\n"); + fsgname = ckd_salloc("unknown"); + } + + /* Read #states */ + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if ((n != 2) + || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) + && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) + || (sscanf(wordptr[1], "%d", &n_state) != 1) + || (n_state <= 0)) { + E_ERROR + ("Line[%d]: #states declaration line missing or malformed\n", + lineno); + goto parse_error; + } + + /* Now create the FSG. */ + fsg = fsg_model_init(fsgname, lmath, lw, n_state); + ckd_free(fsgname); + fsgname = NULL; + + /* Read start state */ + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if ((n != 2) + || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) + && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) + || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) + || (fsg->start_state < 0) + || (fsg->start_state >= fsg->n_state)) { + E_ERROR + ("Line[%d]: start state declaration line missing or malformed\n", + lineno); + goto parse_error; + } + + /* Read final state */ + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if ((n != 2) + || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) + && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) + || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) + || (fsg->final_state < 0) + || (fsg->final_state >= fsg->n_state)) { + E_ERROR + ("Line[%d]: final state declaration line missing or malformed\n", + lineno); + goto parse_error; + } + + /* Read transitions */ + lastwid = 0; + n_trans = n_null_trans = 0; + for (;;) { + int32 wid, tprob; + + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if (n <= 0) { + E_ERROR("Line[%d]: transition or FSG_END statement expected\n", + lineno); + goto parse_error; + } + + if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { + break; + } + + if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) + || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { + + + if (((n != 4) && (n != 5)) + || (sscanf(wordptr[1], "%d", &i) != 1) + || (sscanf(wordptr[2], "%d", &j) != 1) + || (i < 0) || (i >= fsg->n_state) + || (j < 0) || (j >= fsg->n_state)) { + E_ERROR + ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", + lineno); + goto parse_error; + } + + p = atof_c(wordptr[3]); + if ((p <= 0.0) || (p > 1.0)) { + E_ERROR + ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", + lineno); + goto parse_error; + } + } + else { + E_ERROR("Line[%d]: transition or FSG_END statement expected\n", + lineno); + goto parse_error; + } + + tprob = (int32) (logmath_log(lmath, p) * fsg->lw); + /* Add word to "dictionary". */ + if (n > 4) { + if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { + (void) hash_table_enter_int32(vocab, + ckd_salloc(wordptr[4]), + lastwid); + wid = lastwid; + ++lastwid; + } + fsg_model_trans_add(fsg, i, j, tprob, wid); + ++n_trans; + } + else { + if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { + ++n_null_trans; + nulls = + glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); + } + } + } + + E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", + fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); + + + /* Now create a string table from the "dictionary" */ + fsg->n_word = hash_table_inuse(vocab); + fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ + fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); + for (itor = hash_table_iter(vocab); itor; + itor = hash_table_iter_next(itor)) { + char const *word = hash_entry_key(itor->ent); + int32 wid = (int32) (long) hash_entry_val(itor->ent); + fsg->vocab[wid] = (char *) word; + } + hash_table_free(vocab); + + /* Do transitive closure on null transitions */ + nulls = fsg_model_null_trans_closure(fsg, nulls); + glist_free(nulls); + + ckd_free(lineptr); + ckd_free(wordptr); + + return fsg; + + parse_error: + for (itor = hash_table_iter(vocab); itor; + itor = hash_table_iter_next(itor)) + ckd_free((char *) hash_entry_key(itor->ent)); + glist_free(nulls); + hash_table_free(vocab); + ckd_free(fsgname); + ckd_free(lineptr); + ckd_free(wordptr); + fsg_model_free(fsg); + return NULL; +} + + +fsg_model_t * +fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw) +{ + FILE *fp; + fsg_model_t *fsg; + + if ((fp = fopen(file, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); + return NULL; + } + fsg = fsg_model_read(fp, lmath, lw); + fclose(fp); + return fsg; +} + +fsg_model_t * +fsg_model_retain(fsg_model_t * fsg) +{ + ++fsg->refcount; + return fsg; +} + +static void +trans_list_free(fsg_model_t * fsg, int32 i) +{ + hash_iter_t *itor; + + /* FIXME (maybe): FSG links will all get freed when we call + * listelem_alloc_free() so don't bother freeing them explicitly + * here. */ + if (fsg->trans[i].trans) { + for (itor = hash_table_iter(fsg->trans[i].trans); + itor; itor = hash_table_iter_next(itor)) { + glist_t gl = (glist_t) hash_entry_val(itor->ent); + glist_free(gl); + } + } + hash_table_free(fsg->trans[i].trans); + hash_table_free(fsg->trans[i].null_trans); +} + +int +fsg_model_free(fsg_model_t * fsg) +{ + int i; + + if (fsg == NULL) + return 0; + + if (--fsg->refcount > 0) + return fsg->refcount; + + for (i = 0; i < fsg->n_word; ++i) + ckd_free(fsg->vocab[i]); + for (i = 0; i < fsg->n_state; ++i) + trans_list_free(fsg, i); + ckd_free(fsg->trans); + ckd_free(fsg->vocab); + listelem_alloc_free(fsg->link_alloc); + bitvec_free(fsg->silwords); + bitvec_free(fsg->altwords); + ckd_free(fsg->name); + ckd_free(fsg); + return 0; +} + + +void +fsg_model_write(fsg_model_t * fsg, FILE * fp) +{ + int32 i; + + fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, + fsg->name ? fsg->name : ""); + fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); + fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); + fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); + + for (i = 0; i < fsg->n_state; i++) { + fsg_arciter_t *itor; + + for (itor = fsg_model_arcs(fsg, i); itor; + itor = fsg_arciter_next(itor)) { + fsg_link_t *tl = fsg_arciter_get(itor); + + fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, + tl->from_state, tl->to_state, + logmath_exp(fsg->lmath, + (int32) (tl->logs2prob / fsg->lw)), + (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); + } + } + + fprintf(fp, "%s\n", FSG_MODEL_END_DECL); + + fflush(fp); +} + +void +fsg_model_writefile(fsg_model_t * fsg, char const *file) +{ + FILE *fp; + + assert(fsg); + + E_INFO("Writing FSG file '%s'\n", file); + + if ((fp = fopen(file, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); + return; + } + + fsg_model_write(fsg, fp); + + fclose(fp); +} + +static void +fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp) +{ + fsg_arciter_t *itor; + + for (itor = fsg_model_arcs(fsg, i); itor; + itor = fsg_arciter_next(itor)) { + fsg_link_t *tl = fsg_arciter_get(itor); + fprintf(fp, "%d %d %s %f\n", + tl->from_state, tl->to_state, + (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid), + -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw)); + } +} + +void +fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp) +{ + int i; + + /* Write transitions from initial state first. */ + fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp); + + /* Other states. */ + for (i = 0; i < fsg->n_state; i++) { + if (i == fsg_model_start_state(fsg)) + continue; + fsg_model_write_fsm_trans(fsg, i, fp); + } + + /* Final state. */ + fprintf(fp, "%d 0\n", fsg_model_final_state(fsg)); + + fflush(fp); +} + +void +fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file) +{ + FILE *fp; + + assert(fsg); + + E_INFO("Writing FSM file '%s'\n", file); + + if ((fp = fopen(file, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file); + return; + } + + fsg_model_write_fsm(fsg, fp); + + fclose(fp); +} + +void +fsg_model_write_symtab(fsg_model_t * fsg, FILE * file) +{ + int i; + + fprintf(file, " 0\n"); + for (i = 0; i < fsg_model_n_word(fsg); ++i) { + fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1); + } + fflush(file); +} + +void +fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file) +{ + FILE *fp; + + assert(fsg); + + E_INFO("Writing FSM symbol table '%s'\n", file); + + if ((fp = fopen(file, "w")) == NULL) { + E_ERROR("Failed to open symbol table '%s' for writing", file); + return; + } + + fsg_model_write_symtab(fsg, fp); + + fclose(fp); +} diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c new file mode 100644 index 000000000..90e161c62 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf.c @@ -0,0 +1,943 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/err.h" +#include "sphinxbase/jsgf.h" + +#include "jsgf_internal.h" +#include "jsgf_parser.h" +#include "jsgf_scanner.h" + +extern int yyparse (void* scanner, jsgf_t* jsgf); + +/** + * \file jsgf.c + * + * This file implements the data structures for parsing JSGF grammars + * into Sphinx finite-state grammars. + **/ + +static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, int rule_exit); + +jsgf_atom_t * +jsgf_atom_new(char *name, float weight) +{ + jsgf_atom_t *atom; + + atom = ckd_calloc(1, sizeof(*atom)); + atom->name = ckd_salloc(name); + atom->weight = weight; + return atom; +} + +int +jsgf_atom_free(jsgf_atom_t *atom) +{ + if (atom == NULL) + return 0; + ckd_free(atom->name); + ckd_free(atom); + return 0; +} + +jsgf_t * +jsgf_grammar_new(jsgf_t *parent) +{ + jsgf_t *grammar; + + grammar = ckd_calloc(1, sizeof(*grammar)); + /* If this is an imported/subgrammar, then we will share a global + * namespace with the parent grammar. */ + if (parent) { + grammar->rules = parent->rules; + grammar->imports = parent->imports; + grammar->searchpath = parent->searchpath; + grammar->parent = parent; + } + else { + grammar->rules = hash_table_new(64, 0); + grammar->imports = hash_table_new(16, 0); + } + + return grammar; +} + +void +jsgf_grammar_free(jsgf_t *jsgf) +{ + /* FIXME: Probably should just use refcounting instead. */ + if (jsgf->parent == NULL) { + hash_iter_t *itor; + gnode_t *gn; + + for (itor = hash_table_iter(jsgf->rules); itor; + itor = hash_table_iter_next(itor)) { + ckd_free((char *)itor->ent->key); + jsgf_rule_free((jsgf_rule_t *)itor->ent->val); + } + hash_table_free(jsgf->rules); + for (itor = hash_table_iter(jsgf->imports); itor; + itor = hash_table_iter_next(itor)) { + ckd_free((char *)itor->ent->key); + jsgf_grammar_free((jsgf_t *)itor->ent->val); + } + hash_table_free(jsgf->imports); + for (gn = jsgf->searchpath; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(jsgf->searchpath); + for (gn = jsgf->links; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(jsgf->links); + } + ckd_free(jsgf->name); + ckd_free(jsgf->version); + ckd_free(jsgf->charset); + ckd_free(jsgf->locale); + ckd_free(jsgf); +} + +static void +jsgf_rhs_free(jsgf_rhs_t *rhs) +{ + gnode_t *gn; + + if (rhs == NULL) + return; + + jsgf_rhs_free(rhs->alt); + for (gn = rhs->atoms; gn; gn = gnode_next(gn)) + jsgf_atom_free(gnode_ptr(gn)); + glist_free(rhs->atoms); + ckd_free(rhs); +} + +jsgf_atom_t * +jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus) +{ + jsgf_rule_t *rule; + jsgf_atom_t *rule_atom; + jsgf_rhs_t *rhs; + + /* Generate an "internal" rule of the form ( | ) */ + /* Or if plus is true, ( | ) */ + rhs = ckd_calloc(1, sizeof(*rhs)); + if (plus) + rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0)); + else + rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("", 1.0)); + rule = jsgf_define_rule(jsgf, NULL, rhs, 0); + rule_atom = jsgf_atom_new(rule->name, 1.0); + rhs = ckd_calloc(1, sizeof(*rhs)); + rhs->atoms = glist_add_ptr(NULL, rule_atom); + rhs->atoms = glist_add_ptr(rhs->atoms, atom); + rule->rhs->alt = rhs; + + return jsgf_atom_new(rule->name, 1.0); +} + +jsgf_rule_t * +jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp) +{ + jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs)); + jsgf_atom_t *atom = jsgf_atom_new("", 1.0); + rhs->alt = exp; + rhs->atoms = glist_add_ptr(NULL, atom); + return jsgf_define_rule(jsgf, NULL, rhs, 0); +} + +void +jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to) +{ + jsgf_link_t *link; + + link = ckd_calloc(1, sizeof(*link)); + link->from = from; + link->to = to; + link->atom = atom; + grammar->links = glist_add_ptr(grammar->links, link); +} + +static char * +extract_grammar_name(char *rule_name) +{ + char* dot_pos; + char* grammar_name = ckd_salloc(rule_name + 1); + if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) { + ckd_free(grammar_name); + return NULL; + } + *dot_pos='\0'; + return grammar_name; +} + +char const * +jsgf_grammar_name(jsgf_t *jsgf) +{ + return jsgf->name; +} + +static char * +jsgf_fullname(jsgf_t *jsgf, const char *name) +{ + char *fullname; + + /* Check if it is already qualified */ + if (strchr(name + 1, '.')) + return ckd_salloc(name); + + /* Skip leading < in name */ + fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4); + sprintf(fullname, "<%s.%s", jsgf->name, name + 1); + return fullname; +} + +static char * +jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name) +{ + char *fullname, *grammar_name; + + /* Check if it is already qualified */ + if (strchr(name + 1, '.')) + return ckd_salloc(name); + + /* Skip leading < in name */ + if ((grammar_name = extract_grammar_name(rule->name)) == NULL) + return ckd_salloc(name); + fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4); + sprintf(fullname, "<%s.%s", grammar_name, name + 1); + ckd_free(grammar_name); + + return fullname; +} + +/* Extract as rulename everything after the secondlast dot, if existent. + * Because everything before the secondlast dot is the path-specification. */ +static char * +importname2rulename(char *importname) +{ + char *rulename = ckd_salloc(importname); + char *last_dotpos; + char *secondlast_dotpos; + + if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) { + *last_dotpos='\0'; + if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) { + *last_dotpos='.'; + *secondlast_dotpos='<'; + secondlast_dotpos = ckd_salloc(secondlast_dotpos); + ckd_free(rulename); + return secondlast_dotpos; + } + else { + *last_dotpos='.'; + return rulename; + } + } + else { + return rulename; + } +} + +#define NO_NODE -1 +#define RECURSIVE_NODE -2 + +/** + * + * Expand a right-hand-side of a rule (i.e. a single alternate). + * + * @returns the FSG state at the end of this rule, NO_NODE if there's an + * error, and RECURSIVE_NODE if the right-hand-side ended in right-recursion (i.e. + * a link to an earlier FSG state). + */ +static int +expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs, + int rule_entry, int rule_exit) +{ + gnode_t *gn; + int lastnode; + + /* Last node expanded in this sequence. */ + lastnode = rule_entry; + + /* Iterate over atoms in rhs and generate links/nodes */ + for (gn = rhs->atoms; gn; gn = gnode_next(gn)) { + jsgf_atom_t *atom = gnode_ptr(gn); + + if (jsgf_atom_is_rule(atom)) { + jsgf_rule_t *subrule; + char *fullname; + gnode_t *subnode; + jsgf_rule_stack_t *rule_stack_entry = NULL; + + /* Special case for and pseudo-rules + If this is the only atom in the rhs, and it's the + first rhs in the rule, then emit a null transition, + creating an exit state if needed. */ + if (0 == strcmp(atom->name, "")) { + if (gn == rhs->atoms && gnode_next(gn) == NULL) { + if (rule_exit == NO_NODE) { + jsgf_add_link(grammar, atom, + lastnode, grammar->nstate); + rule_exit = lastnode = grammar->nstate; + ++grammar->nstate; + } else { + jsgf_add_link(grammar, atom, + lastnode, rule_exit); + } + } + continue; + } + else if (0 == strcmp(atom->name, "")) { + /* Make this entire RHS unspeakable */ + return NO_NODE; + } + + fullname = jsgf_fullname_from_rule(rule, atom->name); + if (hash_table_lookup(grammar->rules, fullname, (void**)&subrule) == -1) { + E_ERROR("Undefined rule in RHS: %s\n", fullname); + ckd_free(fullname); + return NO_NODE; + } + ckd_free(fullname); + + /* Look for this subrule in the stack of expanded rules */ + for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode)) { + rule_stack_entry = (jsgf_rule_stack_t *)gnode_ptr(subnode); + if (rule_stack_entry->rule == subrule) + break; + } + + if (subnode != NULL) { + /* Allow right-recursion only. */ + if (gnode_next(gn) != NULL) { + E_ERROR("Only right-recursion is permitted (in %s.%s)\n", + grammar->name, rule->name); + return NO_NODE; + } + /* Add a link back to the beginning of this rule instance */ + E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, rule_stack_entry->entry); + jsgf_add_link(grammar, atom, lastnode, rule_stack_entry->entry); + + /* Let our caller know that this rhs didn't reach an + end state. */ + lastnode = RECURSIVE_NODE; + } + else { + /* If this is the last atom in this rhs, link its + expansion to the parent rule's exit state. + Otherwise, create a new exit state for it. */ + int subruleexit = NO_NODE; + if (gnode_next(gn) == NULL && rule_exit >= 0) + subruleexit = rule_exit; + + /* Expand the subrule */ + lastnode = expand_rule(grammar, subrule, lastnode, subruleexit); + + if (lastnode == NO_NODE) + return NO_NODE; + } + } + else { + /* An exit-state is created if this isn't the last atom + in the rhs, or if the containing rule doesn't have an + exit state yet. + Otherwise, the rhs's exit state becomes the containing + rule's exit state. */ + int exitstate; + if (gnode_next(gn) == NULL && rule_exit >= 0) { + exitstate = rule_exit; + } else { + exitstate = grammar->nstate; + ++grammar->nstate; + } + + /* Add a link for this token */ + jsgf_add_link(grammar, atom, + lastnode, exitstate); + lastnode = exitstate; + } + } + + return lastnode; +} + +static int +expand_rule(jsgf_t *grammar, jsgf_rule_t *rule, int rule_entry, + int rule_exit) +{ + jsgf_rule_stack_t* rule_stack_entry; + jsgf_rhs_t *rhs; + + /* Push this rule onto the stack */ + rule_stack_entry = (jsgf_rule_stack_t*)ckd_calloc(1, sizeof (jsgf_rule_stack_t)); + rule_stack_entry->rule = rule; + rule_stack_entry->entry = rule_entry; + grammar->rulestack = glist_add_ptr(grammar->rulestack, + rule_stack_entry); + + for (rhs = rule->rhs; rhs; rhs = rhs->alt) { + int lastnode; + + lastnode = expand_rhs(grammar, rule, rhs, + rule_entry, rule_exit); + + if (lastnode == NO_NODE) { + return NO_NODE; + } else if (lastnode == RECURSIVE_NODE) { + /* The rhs ended with right-recursion, i.e. a transition to + an earlier state. Nothing needs to happen at this level. */ + ; + } else if (rule_exit == NO_NODE) { + /* If this rule doesn't have an exit state yet, use the exit + state of its first right-hand-side. + All other right-hand-sides will use this exit state. */ + assert (lastnode >= 0); + rule_exit = lastnode; + } + } + + /* If no exit-state was created, use the entry-state. */ + if (rule_exit == NO_NODE) { + rule_exit = rule_entry; + } + + /* Pop this rule from the rule stack */ + ckd_free(gnode_ptr(grammar->rulestack)); + grammar->rulestack = gnode_free(grammar->rulestack, NULL); + + return rule_exit; +} + +jsgf_rule_iter_t * +jsgf_rule_iter(jsgf_t *grammar) +{ + return hash_table_iter(grammar->rules); +} + +jsgf_rule_t * +jsgf_get_rule(jsgf_t *grammar, char const *name) +{ + void *val; + char *fullname; + + fullname = string_join("<", name, ">", NULL); + if (hash_table_lookup(grammar->rules, fullname, &val) < 0) { + ckd_free(fullname); + return NULL; + } + ckd_free(fullname); + return (jsgf_rule_t *)val; +} + +jsgf_rule_t * +jsgf_get_public_rule(jsgf_t *grammar) +{ + jsgf_rule_iter_t *itor; + jsgf_rule_t *public_rule = NULL; + + for (itor = jsgf_rule_iter(grammar); itor; + itor = jsgf_rule_iter_next(itor)) { + jsgf_rule_t *rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + const char *rule_name = jsgf_rule_name(rule); + char *dot_pos; + if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) { + public_rule = rule; + jsgf_rule_iter_free(itor); + break; + } + if (0 == strncmp(rule_name + 1, jsgf_grammar_name(grammar), dot_pos - rule_name - 1)) { + public_rule = rule; + jsgf_rule_iter_free(itor); + break; + } + } + } + return public_rule; +} + +char const * +jsgf_rule_name(jsgf_rule_t *rule) +{ + return rule->name; +} + +int +jsgf_rule_public(jsgf_rule_t *rule) +{ + return rule->is_public; +} + +static fsg_model_t * +jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw, int do_closure) +{ + fsg_model_t *fsg; + glist_t nulls; + gnode_t *gn; + int rule_entry, rule_exit; + + /* Clear previous links */ + for (gn = grammar->links; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(grammar->links); + grammar->links = NULL; + grammar->nstate = 0; + + /* Create the top-level entry state, and expand the + top-level rule. */ + rule_entry = grammar->nstate++; + rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); + + /* If no exit-state was created, create one. */ + if (rule_exit == NO_NODE) { + rule_exit = grammar->nstate++; + jsgf_add_link(grammar, NULL, rule_entry, rule_exit); + } + + fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); + fsg->start_state = rule_entry; + fsg->final_state = rule_exit; + grammar->links = glist_reverse(grammar->links); + for (gn = grammar->links; gn; gn = gnode_next(gn)) { + jsgf_link_t *link = gnode_ptr(gn); + + if (link->atom) { + if (jsgf_atom_is_rule(link->atom)) { + fsg_model_null_trans_add(fsg, link->from, link->to, + logmath_log(lmath, link->atom->weight)); + } + else { + int wid = fsg_model_word_add(fsg, link->atom->name); + fsg_model_trans_add(fsg, link->from, link->to, + logmath_log(lmath, link->atom->weight), wid); + } + } + else { + fsg_model_null_trans_add(fsg, link->from, link->to, 0); + } + } + if (do_closure) { + nulls = fsg_model_null_trans_closure(fsg, NULL); + glist_free(nulls); + } + + return fsg; +} + +fsg_model_t * +jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw) +{ + return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE); +} + +fsg_model_t * +jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw) +{ + return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE); +} + +fsg_model_t * +jsgf_read_file(const char *file, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + jsgf_t *jsgf; + jsgf_rule_iter_t *itor; + + if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) { + E_ERROR("Error parsing file: %s\n", file); + return NULL; + } + + rule = NULL; + for (itor = jsgf_rule_iter(jsgf); itor; + itor = jsgf_rule_iter_next(itor)) { + rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + jsgf_rule_iter_free(itor); + break; + } + } + if (rule == NULL) { + E_ERROR("No public rules found in %s\n", file); + return NULL; + } + fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); + jsgf_grammar_free(jsgf); + return fsg; +} + +fsg_model_t * +jsgf_read_string(const char *string, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + jsgf_t *jsgf; + jsgf_rule_iter_t *itor; + + if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) { + E_ERROR("Error parsing input string\n"); + return NULL; + } + + rule = NULL; + for (itor = jsgf_rule_iter(jsgf); itor; + itor = jsgf_rule_iter_next(itor)) { + rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + jsgf_rule_iter_free(itor); + break; + } + } + if (rule == NULL) { + jsgf_grammar_free(jsgf); + E_ERROR("No public rules found in input string\n"); + return NULL; + } + fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); + jsgf_grammar_free(jsgf); + return fsg; +} + + +int +jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh) +{ + fsg_model_t *fsg; + logmath_t *lmath = logmath_init(1.0001, 0, 0); + + if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL) + goto error_out; + + fsg_model_write(fsg, outfh); + logmath_free(lmath); + return 0; + +error_out: + logmath_free(lmath); + return -1; +} + +jsgf_rule_t * +jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public) +{ + jsgf_rule_t *rule; + void *val; + + if (name == NULL) { + name = ckd_malloc(strlen(jsgf->name) + 16); + sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules)); + } + else { + char *newname; + + newname = jsgf_fullname(jsgf, name); + name = newname; + } + + rule = ckd_calloc(1, sizeof(*rule)); + rule->refcnt = 1; + rule->name = ckd_salloc(name); + rule->rhs = rhs; + rule->is_public = is_public; + + E_INFO("Defined rule: %s%s\n", + rule->is_public ? "PUBLIC " : "", + rule->name); + val = hash_table_enter(jsgf->rules, name, rule); + if (val != (void *)rule) { + E_WARN("Multiply defined symbol: %s\n", name); + } + return rule; +} + +jsgf_rule_t * +jsgf_rule_retain(jsgf_rule_t *rule) +{ + ++rule->refcnt; + return rule; +} + +int +jsgf_rule_free(jsgf_rule_t *rule) +{ + if (rule == NULL) + return 0; + if (--rule->refcnt > 0) + return rule->refcnt; + jsgf_rhs_free(rule->rhs); + ckd_free(rule->name); + ckd_free(rule); + return 0; +} + + +/* FIXME: This should go in libsphinxutil */ +static char * +path_list_search(glist_t paths, char *path) +{ + gnode_t *gn; + + for (gn = paths; gn; gn = gnode_next(gn)) { + char *fullpath; + FILE *tmp; + + fullpath = string_join(gnode_ptr(gn), "/", path, NULL); + tmp = fopen(fullpath, "r"); + if (tmp != NULL) { + fclose(tmp); + return fullpath; + } + else { + ckd_free(fullpath); + } + } + return NULL; +} + +jsgf_rule_t * +jsgf_import_rule(jsgf_t *jsgf, char *name) +{ + char *c, *path, *newpath; + size_t namelen, packlen; + void *val; + jsgf_t *imp; + int import_all; + + /* Trim the leading and trailing <> */ + namelen = strlen(name); + path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */ + strcpy(path, name + 1); + /* Split off the first part of the name */ + c = strrchr(path, '.'); + if (c == NULL) { + E_ERROR("Imported rule is not qualified: %s\n", name); + ckd_free(path); + return NULL; + } + packlen = c - path; + *c = '\0'; + + /* Look for import foo.* */ + import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>")); + + /* Construct a filename. */ + for (c = path; *c; ++c) + if (*c == '.') *c = '/'; + strcat(path, ".gram"); + newpath = path_list_search(jsgf->searchpath, path); + if (newpath == NULL) { + E_ERROR("Failed to find grammar %s\n", path); + ckd_free(path); + return NULL; + } + ckd_free(path); + + path = newpath; + E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name); + + /* FIXME: Also, we need to make sure that path is fully qualified + * here, by adding any prefixes from jsgf->name to it. */ + /* See if we have parsed it already */ + if (hash_table_lookup(jsgf->imports, path, &val) == 0) { + E_INFO("Already imported %s\n", path); + imp = val; + ckd_free(path); + } + else { + /* If not, parse it. */ + imp = jsgf_parse_file(path, jsgf); + val = hash_table_enter(jsgf->imports, path, imp); + if (val != (void *)imp) { + E_WARN("Multiply imported file: %s\n", path); + } + } + if (imp != NULL) { + hash_iter_t *itor; + /* Look for public rules matching rulename. */ + for (itor = hash_table_iter(imp->rules); itor; + itor = hash_table_iter_next(itor)) { + hash_entry_t *he = itor->ent; + jsgf_rule_t *rule = hash_entry_val(he); + int rule_matches; + char *rule_name = importname2rulename(name); + + if (import_all) { + /* Match package name (symbol table is shared) */ + rule_matches = !strncmp(rule_name, rule->name, packlen + 1); + } + else { + /* Exact match */ + rule_matches = !strcmp(rule_name, rule->name); + } + ckd_free(rule_name); + if (rule->is_public && rule_matches) { + void *val; + char *newname; + + /* Link this rule into the current namespace. */ + c = strrchr(rule->name, '.'); + assert(c != NULL); + newname = jsgf_fullname(jsgf, c); + + E_INFO("Imported %s\n", newname); + val = hash_table_enter(jsgf->rules, newname, + jsgf_rule_retain(rule)); + if (val != (void *)rule) { + E_WARN("Multiply defined symbol: %s\n", newname); + } + if (!import_all) { + hash_table_iter_free(itor); + return rule; + } + } + } + } + + return NULL; +} + +static void +jsgf_set_search_path(jsgf_t *jsgf, const char *filename) +{ + char *jsgf_path; + +#if !defined(_WIN32_WCE) + if ((jsgf_path = getenv("JSGF_PATH")) != NULL) { + char *word, *c; + /* FIXME: This should be a function in libsphinxbase. */ + word = jsgf_path = ckd_salloc(jsgf_path); + while ((c = strchr(word, ':'))) { + *c = '\0'; + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); + word = c + 1; + } + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); + jsgf->searchpath = glist_reverse(jsgf->searchpath); + return; + } +#endif + + if (!filename) { + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, ckd_salloc(".")); + return; + } + + jsgf_path = ckd_salloc(filename); + path2dirname(filename, jsgf_path); + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path); +} + +jsgf_t * +jsgf_parse_file(const char *filename, jsgf_t *parent) +{ + yyscan_t yyscanner; + jsgf_t *jsgf; + int yyrv; + FILE *in = NULL; + + yylex_init(&yyscanner); + if (filename == NULL) { + yyset_in(stdin, yyscanner); + } + else { + in = fopen(filename, "r"); + if (in == NULL) { + E_ERROR_SYSTEM("Failed to open %s for parsing", filename); + return NULL; + } + yyset_in(in, yyscanner); + } + + jsgf = jsgf_grammar_new(parent); + + if (!parent) + jsgf_set_search_path(jsgf, filename); + + yyrv = yyparse(yyscanner, jsgf); + if (yyrv != 0) { + E_ERROR("Failed to parse JSGF grammar from '%s'\n", filename ? filename : "(stdin)"); + jsgf_grammar_free(jsgf); + yylex_destroy(yyscanner); + return NULL; + } + if (in) + fclose(in); + yylex_destroy(yyscanner); + + return jsgf; +} + +jsgf_t * +jsgf_parse_string(const char *string, jsgf_t * parent) +{ + yyscan_t yyscanner; + jsgf_t *jsgf; + int yyrv; + YY_BUFFER_STATE buf; + + yylex_init(&yyscanner); + buf = yy_scan_string(string, yyscanner); + + jsgf = jsgf_grammar_new(parent); + if (!parent) + jsgf_set_search_path(jsgf, NULL); + + yyrv = yyparse(yyscanner, jsgf); + if (yyrv != 0) { + E_ERROR("Failed to parse JSGF grammar from input string\n"); + jsgf_grammar_free(jsgf); + yy_delete_buffer(buf, yyscanner); + yylex_destroy(yyscanner); + return NULL; + } + yy_delete_buffer(buf, yyscanner); + yylex_destroy(yyscanner); + + return jsgf; +} diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h new file mode 100644 index 000000000..a5cbc9833 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_internal.h @@ -0,0 +1,140 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __JSGF_INTERNAL_H__ +#define __JSGF_INTERNAL_H__ + +/** + * @file jsgf_internal.h Internal definitions for JSGF grammar compiler + */ + +#include + +#include +#include +#include +#include +#include +#include + + +/* Flex uses strdup which is missing on WinCE */ +#if defined(_WIN32) || defined(_WIN32_WCE) +#define strdup _strdup +#endif + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define YY_NO_INPUT /* Silence a compiler warning. */ + +typedef struct jsgf_rhs_s jsgf_rhs_t; +typedef struct jsgf_atom_s jsgf_atom_t; +typedef struct jsgf_link_s jsgf_link_t; +typedef struct jsgf_rule_stack_s jsgf_rule_stack_t; + +struct jsgf_s { + char *version; /**< JSGF version (from header) */ + char *charset; /**< JSGF charset (default UTF-8) */ + char *locale; /**< JSGF locale (default C) */ + char *name; /**< Grammar name */ + + hash_table_t *rules; /**< Defined or imported rules in this grammar. */ + hash_table_t *imports; /**< Pointers to imported grammars. */ + jsgf_t *parent; /**< Parent grammar (if this is an imported one) */ + glist_t searchpath; /**< List of directories to search for grammars. */ + + /* Scratch variables for FSG conversion. */ + int nstate; /**< Number of generated states. */ + glist_t links; /**< Generated FSG links. */ + glist_t rulestack; /**< Stack of currently expanded rules. */ +}; + +/* A type to keep track of the stack of rules currently being expanded. */ +struct jsgf_rule_stack_s { + jsgf_rule_t *rule; /**< The rule being expanded */ + int entry; /**< The entry-state for this expansion */ +}; + +struct jsgf_rule_s { + int refcnt; /**< Reference count. */ + char *name; /**< Rule name (NULL for an alternation/grouping) */ + int is_public; /**< Is this rule marked 'public'? */ + jsgf_rhs_t *rhs; /**< Expansion */ +}; + +struct jsgf_rhs_s { + glist_t atoms; /**< Sequence of items */ + jsgf_rhs_t *alt; /**< Linked list of alternates */ +}; + +struct jsgf_atom_s { + char *name; /**< Rule or token name */ + glist_t tags; /**< Tags, if any (glist_t of char *) */ + float weight; /**< Weight (default 1) */ +}; + +struct jsgf_link_s { + jsgf_atom_t *atom; /**< Name, tags, weight */ + int from; /**< From state */ + int to; /**< To state */ +}; + +#define jsgf_atom_is_rule(atom) ((atom)->name[0] == '<') + +void jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to); +jsgf_atom_t *jsgf_atom_new(char *name, float weight); +jsgf_atom_t *jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus); +jsgf_rule_t *jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp); +jsgf_rule_t *jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public); +jsgf_rule_t *jsgf_import_rule(jsgf_t *jsgf, char *name); + +int jsgf_atom_free(jsgf_atom_t *atom); +int jsgf_rule_free(jsgf_rule_t *rule); +jsgf_rule_t *jsgf_rule_retain(jsgf_rule_t *rule); + +#ifdef __cplusplus +} +#endif + + +#endif /* __JSGF_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c new file mode 100644 index 000000000..20acbb9d9 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.c @@ -0,0 +1,1799 @@ + +/* A Bison parser, made by GNU Bison 2.4.1. */ + +/* Skeleton implementation for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "2.4.1" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + +/* Using locations. */ +#define YYLSP_NEEDED 0 + + + +/* Copy the first part of user declarations. */ + +/* Line 189 of yacc.c */ +#line 37 "jsgf_parser.y" + +#define YYERROR_VERBOSE + +#include +#include + +#include +#include +#include + +#include "jsgf_internal.h" +#include "jsgf_parser.h" +#include "jsgf_scanner.h" + +/* Suppress warnings from generated code */ +#if defined _MSC_VER +#pragma warning(disable: 4273) +#endif + +void yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s); + + + +/* Line 189 of yacc.c */ +#line 97 "jsgf_parser.c" + +/* Enabling traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* Enabling the token table. */ +#ifndef YYTOKEN_TABLE +# define YYTOKEN_TABLE 0 +#endif + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + HEADER = 258, + GRAMMAR = 259, + IMPORT = 260, + PUBLIC = 261, + TOKEN = 262, + RULENAME = 263, + TAG = 264, + WEIGHT = 265 + }; +#endif +/* Tokens. */ +#define HEADER 258 +#define GRAMMAR 259 +#define IMPORT 260 +#define PUBLIC 261 +#define TOKEN 262 +#define RULENAME 263 +#define TAG 264 +#define WEIGHT 265 + + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +{ + +/* Line 214 of yacc.c */ +#line 65 "jsgf_parser.y" + + char *name; + float weight; + jsgf_rule_t *rule; + jsgf_rhs_t *rhs; + jsgf_atom_t *atom; + + + +/* Line 214 of yacc.c */ +#line 163 "jsgf_parser.c" +} YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +#endif + + +/* Copy the second part of user declarations. */ + + +/* Line 264 of yacc.c */ +#line 175 "jsgf_parser.c" + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#elif (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +typedef signed char yytype_int8; +#else +typedef short int yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(msgid) dgettext ("bison-runtime", msgid) +# endif +# endif +# ifndef YY_ +# define YY_(msgid) msgid +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(e) ((void) (e)) +#else +# define YYUSE(e) /* empty */ +#endif + +/* Identity function, used to suppress warnings about constant conditions. */ +#ifndef lint +# define YYID(n) (n) +#else +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static int +YYID (int yyi) +#else +static int +YYID (yyi) + int yyi; +#endif +{ + return yyi; +} +#endif + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef _STDLIB_H +# define _STDLIB_H 1 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's `empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined _STDLIB_H \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef _STDLIB_H +# define _STDLIB_H 1 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +/* Copy COUNT objects from FROM to TO. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(To, From, Count) \ + __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# else +# define YYCOPY(To, From, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (To)[yyi] = (From)[yyi]; \ + } \ + while (YYID (0)) +# endif +# endif + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (YYID (0)) + +#endif + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 7 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 54 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 20 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 16 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 33 +/* YYNRULES -- Number of states. */ +#define YYNSTATES 58 + +/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 265 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 14, 15, 18, 19, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 11, + 2, 12, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 16, 2, 17, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 13, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10 +}; + +#if YYDEBUG +/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in + YYRHS. */ +static const yytype_uint8 yyprhs[] = +{ + 0, 0, 3, 5, 8, 12, 15, 18, 22, 27, + 33, 37, 39, 42, 46, 48, 51, 56, 62, 64, + 68, 70, 73, 75, 78, 80, 83, 87, 91, 93, + 95, 97, 99, 102 +}; + +/* YYRHS -- A `-1'-separated list of the rules' RHS. */ +static const yytype_int8 yyrhs[] = +{ + 21, 0, -1, 22, -1, 22, 27, -1, 22, 25, + 27, -1, 23, 24, -1, 3, 11, -1, 3, 7, + 11, -1, 3, 7, 7, 11, -1, 3, 7, 7, + 7, 11, -1, 4, 7, 11, -1, 26, -1, 25, + 26, -1, 5, 8, 11, -1, 28, -1, 27, 28, + -1, 8, 12, 29, 11, -1, 6, 8, 12, 29, + 11, -1, 30, -1, 29, 13, 30, -1, 31, -1, + 30, 31, -1, 32, -1, 31, 9, -1, 35, -1, + 10, 35, -1, 14, 29, 15, -1, 16, 29, 17, + -1, 7, -1, 8, -1, 33, -1, 34, -1, 35, + 18, -1, 35, 19, -1 +}; + +/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ +static const yytype_uint8 yyrline[] = +{ + 0, 82, 82, 83, 84, 87, 90, 91, 92, 93, + 97, 100, 101, 104, 107, 108, 111, 112, 115, 116, + 121, 123, 127, 128, 132, 133, 136, 139, 142, 143, + 144, 145, 146, 147 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "HEADER", "GRAMMAR", "IMPORT", "PUBLIC", + "TOKEN", "RULENAME", "TAG", "WEIGHT", "';'", "'='", "'|'", "'('", "')'", + "'['", "']'", "'*'", "'+'", "$accept", "grammar", "header", + "jsgf_header", "grammar_header", "import_header", "import_statement", + "rule_list", "rule", "alternate_list", "rule_expansion", + "tagged_rule_item", "rule_item", "rule_group", "rule_optional", + "rule_atom", 0 +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to + token YYLEX-NUM. */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 59, 61, 124, 40, 41, 91, 93, 42, 43 +}; +# endif + +/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 20, 21, 21, 21, 22, 23, 23, 23, 23, + 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, + 30, 30, 31, 31, 32, 32, 33, 34, 35, 35, + 35, 35, 35, 35 +}; + +/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 1, 2, 3, 2, 2, 3, 4, 5, + 3, 1, 2, 3, 1, 2, 4, 5, 1, 3, + 1, 2, 1, 2, 1, 2, 3, 3, 1, 1, + 1, 1, 2, 2 +}; + +/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state + STATE-NUM when YYTABLE doesn't specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 0, 0, 0, 2, 0, 0, 6, 1, 0, 0, + 0, 0, 11, 3, 14, 0, 5, 0, 7, 0, + 0, 0, 12, 4, 15, 0, 0, 8, 13, 0, + 28, 29, 0, 0, 0, 0, 18, 20, 22, 30, + 31, 24, 10, 9, 0, 25, 0, 0, 16, 0, + 21, 23, 32, 33, 17, 26, 27, 19 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 2, 3, 4, 16, 11, 12, 13, 14, 35, + 36, 37, 38, 39, 40, 41 +}; + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +#define YYPACT_NINF -37 +static const yytype_int8 yypact[] = +{ + -1, -2, 36, 22, 35, 8, -37, -37, 32, 33, + 30, 22, -37, 17, -37, 37, -37, 13, -37, 34, + 31, -4, -37, 17, -37, 38, 39, -37, -37, -4, + -37, -37, 0, -4, -4, 18, -4, 42, -37, -37, + -37, 19, -37, -37, 21, 19, 20, 9, -37, -4, + 42, -37, -37, -37, -37, -37, -37, -4 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -37, -37, -37, -37, -37, -37, 41, 43, -12, -16, + -3, -36, -37, -37, -37, 15 +}; + +/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule which + number is the opposite. If zero, do what YYDEFACT says. + If YYTABLE_NINF, syntax error. */ +#define YYTABLE_NINF -1 +static const yytype_uint8 yytable[] = +{ + 50, 24, 1, 30, 31, 5, 32, 30, 31, 6, + 33, 24, 34, 44, 33, 17, 34, 46, 47, 18, + 26, 50, 49, 9, 27, 10, 56, 8, 9, 48, + 10, 49, 54, 49, 49, 55, 7, 52, 53, 15, + 19, 20, 21, 29, 25, 28, 57, 45, 0, 42, + 43, 51, 22, 0, 23 +}; + +static const yytype_int8 yycheck[] = +{ + 36, 13, 3, 7, 8, 7, 10, 7, 8, 11, + 14, 23, 16, 29, 14, 7, 16, 33, 34, 11, + 7, 57, 13, 6, 11, 8, 17, 5, 6, 11, + 8, 13, 11, 13, 13, 15, 0, 18, 19, 4, + 8, 8, 12, 12, 7, 11, 49, 32, -1, 11, + 11, 9, 11, -1, 11 +}; + +/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 3, 21, 22, 23, 7, 11, 0, 5, 6, + 8, 25, 26, 27, 28, 4, 24, 7, 11, 8, + 8, 12, 26, 27, 28, 7, 7, 11, 11, 12, + 7, 8, 10, 14, 16, 29, 30, 31, 32, 33, + 34, 35, 11, 11, 29, 35, 29, 29, 11, 13, + 31, 9, 18, 19, 11, 15, 17, 30 +}; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +/* Like YYERROR except do call yyerror. This remains here temporarily + to ease the transition to the new meaning of YYERROR, for GCC. + Once GCC version 2 has supplanted version 1, this can go. */ + +#define YYFAIL goto yyerrlab + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY && yylen == 1) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + yytoken = YYTRANSLATE (yychar); \ + YYPOPSTACK (1); \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (yyscanner, jsgf, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (YYID (0)) + + +#define YYTERROR 1 +#define YYERRCODE 256 + + +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (YYID (N)) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (YYID (0)) +#endif + + +/* YY_LOCATION_PRINT -- Print the location on the stream. + This macro was not mandated originally: define only if we know + we won't break user code: when these are the locations we know. */ + +#ifndef YY_LOCATION_PRINT +# if YYLTYPE_IS_TRIVIAL +# define YY_LOCATION_PRINT(File, Loc) \ + fprintf (File, "%d.%d-%d.%d", \ + (Loc).first_line, (Loc).first_column, \ + (Loc).last_line, (Loc).last_column) +# else +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif +#endif + + +/* YYLEX -- calling `yylex' with the right arguments. */ + +#ifdef YYLEX_PARAM +# define YYLEX yylex (&yylval, YYLEX_PARAM) +#else +# define YYLEX yylex (&yylval, yyscanner) +#endif + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (YYID (0)) + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value, yyscanner, jsgf); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (YYID (0)) + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf) +#else +static void +yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + if (!yyvaluep) + return; + YYUSE (yyscanner); + YYUSE (jsgf); +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# else + YYUSE (yyoutput); +# endif + switch (yytype) + { + default: + break; + } +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf) +#else +static void +yy_symbol_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + if (yytype < YYNTOKENS) + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); + else + YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +#else +static void +yy_stack_print (yybottom, yytop) + yytype_int16 *yybottom; + yytype_int16 *yytop; +#endif +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (YYID (0)) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_reduce_print (YYSTYPE *yyvsp, int yyrule, void* yyscanner, jsgf_t *jsgf) +#else +static void +yy_reduce_print (yyvsp, yyrule, yyscanner, jsgf) + YYSTYPE *yyvsp; + int yyrule; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + int yynrhs = yyr2[yyrule]; + int yyi; + unsigned long int yylno = yyrline[yyrule]; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], + &(yyvsp[(yyi + 1) - (yynrhs)]) + , yyscanner, jsgf); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyvsp, Rule, yyscanner, jsgf); \ +} while (YYID (0)) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static YYSIZE_T +yystrlen (const char *yystr) +#else +static YYSIZE_T +yystrlen (yystr) + const char *yystr; +#endif +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static char * +yystpcpy (char *yydest, const char *yysrc) +#else +static char * +yystpcpy (yydest, yysrc) + char *yydest; + const char *yysrc; +#endif +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into YYRESULT an error message about the unexpected token + YYCHAR while in state YYSTATE. Return the number of bytes copied, + including the terminating null byte. If YYRESULT is null, do not + copy anything; just return the number of bytes that would be + copied. As a special case, return 0 if an ordinary "syntax error" + message will do. Return YYSIZE_MAXIMUM if overflow occurs during + size calculation. */ +static YYSIZE_T +yysyntax_error (char *yyresult, int yystate, int yychar) +{ + int yyn = yypact[yystate]; + + if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) + return 0; + else + { + int yytype = YYTRANSLATE (yychar); + YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); + YYSIZE_T yysize = yysize0; + YYSIZE_T yysize1; + int yysize_overflow = 0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + int yyx; + +# if 0 + /* This is so xgettext sees the translatable formats that are + constructed on the fly. */ + YY_("syntax error, unexpected %s"); + YY_("syntax error, unexpected %s, expecting %s"); + YY_("syntax error, unexpected %s, expecting %s or %s"); + YY_("syntax error, unexpected %s, expecting %s or %s or %s"); + YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); +# endif + char *yyfmt; + char const *yyf; + static char const yyunexpected[] = "syntax error, unexpected %s"; + static char const yyexpecting[] = ", expecting %s"; + static char const yyor[] = " or %s"; + char yyformat[sizeof yyunexpected + + sizeof yyexpecting - 1 + + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) + * (sizeof yyor - 1))]; + char const *yyprefix = yyexpecting; + + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yycount = 1; + + yyarg[0] = yytname[yytype]; + yyfmt = yystpcpy (yyformat, yyunexpected); + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + yyformat[sizeof yyunexpected - 1] = '\0'; + break; + } + yyarg[yycount++] = yytname[yyx]; + yysize1 = yysize + yytnamerr (0, yytname[yyx]); + yysize_overflow |= (yysize1 < yysize); + yysize = yysize1; + yyfmt = yystpcpy (yyfmt, yyprefix); + yyprefix = yyor; + } + + yyf = YY_(yyformat); + yysize1 = yysize + yystrlen (yyf); + yysize_overflow |= (yysize1 < yysize); + yysize = yysize1; + + if (yysize_overflow) + return YYSIZE_MAXIMUM; + + if (yyresult) + { + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + char *yyp = yyresult; + int yyi = 0; + while ((*yyp = *yyf) != '\0') + { + if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyf += 2; + } + else + { + yyp++; + yyf++; + } + } + } + return yysize; + } +} +#endif /* YYERROR_VERBOSE */ + + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, void* yyscanner, jsgf_t *jsgf) +#else +static void +yydestruct (yymsg, yytype, yyvaluep, yyscanner, jsgf) + const char *yymsg; + int yytype; + YYSTYPE *yyvaluep; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + YYUSE (yyvaluep); + YYUSE (yyscanner); + YYUSE (jsgf); + + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + switch (yytype) + { + + default: + break; + } +} + +/* Prevent warnings from -Wmissing-prototypes. */ +#ifdef YYPARSE_PARAM +#if defined __STDC__ || defined __cplusplus +int yyparse (void *YYPARSE_PARAM); +#else +int yyparse (); +#endif +#else /* ! YYPARSE_PARAM */ +#if defined __STDC__ || defined __cplusplus +int yyparse (void* yyscanner, jsgf_t *jsgf); +#else +int yyparse (); +#endif +#endif /* ! YYPARSE_PARAM */ + + + + + +/*-------------------------. +| yyparse or yypush_parse. | +`-------------------------*/ + +#ifdef YYPARSE_PARAM +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void *YYPARSE_PARAM) +#else +int +yyparse (YYPARSE_PARAM) + void *YYPARSE_PARAM; +#endif +#else /* ! YYPARSE_PARAM */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void* yyscanner, jsgf_t *jsgf) +#else +int +yyparse (yyscanner, jsgf) + void* yyscanner; + jsgf_t *jsgf; +#endif +#endif +{ +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; + + /* Number of syntax errors so far. */ + int yynerrs; + + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + `yyss': related to states. + `yyvs': related to semantic values. + + Refer to the stacks thru separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yytoken = 0; + yyss = yyssa; + yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + + /* Initialize stack pointers. + Waste one element of value and location stack + so that they stay on the same level as the state stack. + The wasted elements are never initialized. */ + yyssp = yyss; + yyvsp = yyvs; + + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yyn == YYPACT_NINF) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = YYLEX; + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yyn == 0 || yyn == YYTABLE_NINF) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + *++yyvsp = yylval; + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + `$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 5: + +/* Line 1455 of yacc.c */ +#line 87 "jsgf_parser.y" + { jsgf->name = (yyvsp[(2) - (2)].name); } + break; + + case 7: + +/* Line 1455 of yacc.c */ +#line 91 "jsgf_parser.y" + { jsgf->version = (yyvsp[(2) - (3)].name); } + break; + + case 8: + +/* Line 1455 of yacc.c */ +#line 92 "jsgf_parser.y" + { jsgf->version = (yyvsp[(2) - (4)].name); jsgf->charset = (yyvsp[(3) - (4)].name); } + break; + + case 9: + +/* Line 1455 of yacc.c */ +#line 93 "jsgf_parser.y" + { jsgf->version = (yyvsp[(2) - (5)].name); jsgf->charset = (yyvsp[(3) - (5)].name); + jsgf->locale = (yyvsp[(4) - (5)].name); } + break; + + case 10: + +/* Line 1455 of yacc.c */ +#line 97 "jsgf_parser.y" + { (yyval.name) = (yyvsp[(2) - (3)].name); } + break; + + case 13: + +/* Line 1455 of yacc.c */ +#line 104 "jsgf_parser.y" + { jsgf_import_rule(jsgf, (yyvsp[(2) - (3)].name)); ckd_free((yyvsp[(2) - (3)].name)); } + break; + + case 16: + +/* Line 1455 of yacc.c */ +#line 111 "jsgf_parser.y" + { jsgf_define_rule(jsgf, (yyvsp[(1) - (4)].name), (yyvsp[(3) - (4)].rhs), 0); ckd_free((yyvsp[(1) - (4)].name)); } + break; + + case 17: + +/* Line 1455 of yacc.c */ +#line 112 "jsgf_parser.y" + { jsgf_define_rule(jsgf, (yyvsp[(2) - (5)].name), (yyvsp[(4) - (5)].rhs), 1); ckd_free((yyvsp[(2) - (5)].name)); } + break; + + case 18: + +/* Line 1455 of yacc.c */ +#line 115 "jsgf_parser.y" + { (yyval.rhs) = (yyvsp[(1) - (1)].rhs); (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); } + break; + + case 19: + +/* Line 1455 of yacc.c */ +#line 116 "jsgf_parser.y" + { (yyval.rhs) = (yyvsp[(3) - (3)].rhs); + (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); + (yyval.rhs)->alt = (yyvsp[(1) - (3)].rhs); } + break; + + case 20: + +/* Line 1455 of yacc.c */ +#line 121 "jsgf_parser.y" + { (yyval.rhs) = ckd_calloc(1, sizeof(*(yyval.rhs))); + (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(1) - (1)].atom)); } + break; + + case 21: + +/* Line 1455 of yacc.c */ +#line 123 "jsgf_parser.y" + { (yyval.rhs) = (yyvsp[(1) - (2)].rhs); + (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(2) - (2)].atom)); } + break; + + case 23: + +/* Line 1455 of yacc.c */ +#line 128 "jsgf_parser.y" + { (yyval.atom) = (yyvsp[(1) - (2)].atom); + (yyval.atom)->tags = glist_add_ptr((yyval.atom)->tags, (yyvsp[(2) - (2)].name)); } + break; + + case 25: + +/* Line 1455 of yacc.c */ +#line 133 "jsgf_parser.y" + { (yyval.atom) = (yyvsp[(2) - (2)].atom); (yyval.atom)->weight = (yyvsp[(1) - (2)].weight); } + break; + + case 26: + +/* Line 1455 of yacc.c */ +#line 136 "jsgf_parser.y" + { (yyval.rule) = jsgf_define_rule(jsgf, NULL, (yyvsp[(2) - (3)].rhs), 0); } + break; + + case 27: + +/* Line 1455 of yacc.c */ +#line 139 "jsgf_parser.y" + { (yyval.rule) = jsgf_optional_new(jsgf, (yyvsp[(2) - (3)].rhs)); } + break; + + case 28: + +/* Line 1455 of yacc.c */ +#line 142 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); } + break; + + case 29: + +/* Line 1455 of yacc.c */ +#line 143 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); } + break; + + case 30: + +/* Line 1455 of yacc.c */ +#line 144 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); } + break; + + case 31: + +/* Line 1455 of yacc.c */ +#line 145 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); } + break; + + case 32: + +/* Line 1455 of yacc.c */ +#line 146 "jsgf_parser.y" + { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 0); } + break; + + case 33: + +/* Line 1455 of yacc.c */ +#line 147 "jsgf_parser.y" + { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 1); } + break; + + + +/* Line 1455 of yacc.c */ +#line 1580 "jsgf_parser.c" + default: break; + } + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now `shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*------------------------------------. +| yyerrlab -- here on detecting error | +`------------------------------------*/ +yyerrlab: + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (yyscanner, jsgf, YY_("syntax error")); +#else + { + YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); + if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) + { + YYSIZE_T yyalloc = 2 * yysize; + if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) + yyalloc = YYSTACK_ALLOC_MAXIMUM; + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yyalloc); + if (yymsg) + yymsg_alloc = yyalloc; + else + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + } + } + + if (0 < yysize && yysize <= yymsg_alloc) + { + (void) yysyntax_error (yymsg, yystate, yychar); + yyerror (yyscanner, jsgf, yymsg); + } + else + { + yyerror (yyscanner, jsgf, YY_("syntax error")); + if (yysize != 0) + goto yyexhaustedlab; + } + } +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, yyscanner, jsgf); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule which action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (yyn != YYPACT_NINF) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp, yyscanner, jsgf); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + *++yyvsp = yylval; + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined(yyoverflow) || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (yyscanner, jsgf, YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, yyscanner, jsgf); + /* Do not reclaim the symbols of the rule which action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp, yyscanner, jsgf); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + /* Make sure YYID is used. */ + return YYID (yyresult); +} + + + +/* Line 1675 of yacc.c */ +#line 150 "jsgf_parser.y" + + +void +yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s) +{ + E_ERROR("%s at line %d current token '%s'\n", s, yyget_lineno(lex), yyget_text(lex)); +} + diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h new file mode 100644 index 000000000..95f68e329 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_parser.h @@ -0,0 +1,90 @@ + +/* A Bison parser, made by GNU Bison 2.4.1. */ + +/* Skeleton interface for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + HEADER = 258, + GRAMMAR = 259, + IMPORT = 260, + PUBLIC = 261, + TOKEN = 262, + RULENAME = 263, + TAG = 264, + WEIGHT = 265 + }; +#endif +/* Tokens. */ +#define HEADER 258 +#define GRAMMAR 259 +#define IMPORT 260 +#define PUBLIC 261 +#define TOKEN 262 +#define RULENAME 263 +#define TAG 264 +#define WEIGHT 265 + + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +{ + +/* Line 1676 of yacc.c */ +#line 65 "jsgf_parser.y" + + char *name; + float weight; + jsgf_rule_t *rule; + jsgf_rhs_t *rhs; + jsgf_atom_t *atom; + + + +/* Line 1676 of yacc.c */ +#line 82 "jsgf_parser.h" +} YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +#endif + + + + diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c new file mode 100644 index 000000000..5d41d2a6b --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.c @@ -0,0 +1,2199 @@ +#line 2 "jsgf_scanner.c" + +#line 4 "jsgf_scanner.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 37 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +/* C99 requires __STDC__ to be defined as 1. */ +#if defined (__STDC__) + +#define YY_USE_CONST + +#endif /* defined (__STDC__) */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yyg->yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yyg->yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart(yyin ,yyscanner ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE 16384 +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + /* Note: We specifically omit the test for yy_rule_can_match_eol because it requires + * access to the local variable yy_act. Since yyless() is a macro, it would break + * existing scanners that call yyless() from OUTSIDE yylex. + * One obvious solution it to make yy_act a global. I tried that, and saw + * a 5% performance hit in a non-yylineno scanner, because yy_act is + * normally declared as a register variable-- so it is not worth it. + */ + #define YY_LESS_LINENO(n) \ + do { \ + int yyl;\ + for ( yyl = n; yyl < yyleng; ++yyl )\ + if ( yytext[yyl] == '\n' )\ + --yylineno;\ + }while(0) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = yyg->yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + yy_size_t yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ + ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ + : NULL) + +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] + +void yyrestart (FILE *input_file ,yyscan_t yyscanner ); +void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void yypop_buffer_state (yyscan_t yyscanner ); + +static void yyensure_buffer_stack (yyscan_t yyscanner ); +static void yy_load_buffer_state (yyscan_t yyscanner ); +static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner ); + +#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ,yyscanner) + +YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); + +void *yyalloc (yy_size_t ,yyscan_t yyscanner ); +void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void yyfree (void * ,yyscan_t yyscanner ); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +#define yywrap(yyscanner) 1 +#define YY_SKIP_YYWRAP + +typedef unsigned char YY_CHAR; + +typedef int yy_state_type; + +#define yytext_ptr yytext_r + +static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); +static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); +static int yy_get_next_buffer (yyscan_t yyscanner ); +static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yyg->yytext_ptr = yy_bp; \ + yyleng = (size_t) (yy_cp - yy_bp); \ + yyg->yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yyg->yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 22 +#define YY_END_OF_BUFFER 23 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static yyconst flex_int16_t yy_accept[98] = + { 0, + 0, 0, 0, 0, 0, 0, 0, 0, 23, 22, + 1, 22, 22, 22, 22, 22, 22, 22, 5, 1, + 5, 17, 1, 17, 21, 21, 18, 21, 21, 9, + 1, 9, 0, 3, 0, 0, 0, 0, 0, 0, + 4, 17, 17, 0, 17, 17, 7, 0, 20, 0, + 0, 0, 0, 0, 16, 8, 0, 0, 2, 14, + 0, 0, 0, 0, 19, 0, 17, 0, 17, 17, + 0, 0, 6, 20, 0, 15, 0, 0, 16, 0, + 0, 0, 0, 0, 19, 0, 0, 0, 10, 0, + 0, 0, 0, 12, 13, 11, 0 + + } ; + +static yyconst flex_int32_t yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 4, 5, 1, 1, 1, 1, 6, + 6, 7, 6, 1, 8, 9, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 1, 12, 13, + 6, 14, 1, 1, 1, 1, 1, 1, 1, 15, + 16, 1, 1, 17, 1, 1, 1, 1, 1, 1, + 1, 1, 18, 1, 1, 1, 1, 1, 1, 1, + 6, 19, 6, 1, 1, 1, 20, 21, 22, 1, + + 23, 1, 24, 1, 25, 1, 1, 26, 27, 1, + 28, 29, 1, 30, 1, 31, 32, 1, 1, 1, + 1, 1, 33, 6, 34, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 35, 1, 1, 1, + 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int32_t yy_meta[38] = + { 0, + 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, + 1, 2, 3, 3, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 1, 1, 1 + } ; + +static yyconst flex_int16_t yy_base[113] = + { 0, + 0, 36, 4, 12, 72, 105, 14, 20, 135, 312, + 312, 117, 2, 0, 103, 105, 99, 95, 312, 312, + 119, 0, 312, 138, 312, 21, 312, 0, 1, 312, + 312, 118, 109, 312, 123, 111, 104, 94, 101, 85, + 312, 0, 171, 14, 0, 204, 312, 109, 113, 41, + 106, 96, 21, 23, 312, 312, 88, 98, 312, 312, + 73, 71, 70, 89, 312, 44, 0, 39, 0, 237, + 43, 90, 312, 312, 57, 312, 37, 69, 43, 77, + 64, 57, 58, 64, 76, 94, 79, 59, 312, 39, + 14, 14, 4, 312, 312, 312, 312, 271, 274, 277, + + 280, 283, 0, 285, 288, 290, 293, 296, 299, 302, + 305, 308 + } ; + +static yyconst flex_int16_t yy_def[113] = + { 0, + 98, 98, 99, 99, 100, 100, 101, 101, 97, 97, + 97, 97, 97, 102, 97, 97, 97, 97, 97, 97, + 97, 103, 97, 104, 97, 97, 97, 105, 106, 97, + 97, 97, 97, 97, 107, 102, 97, 97, 97, 97, + 97, 103, 104, 108, 103, 109, 97, 97, 110, 97, + 97, 105, 106, 111, 97, 97, 97, 107, 97, 97, + 97, 97, 97, 97, 97, 112, 43, 108, 43, 109, + 97, 110, 97, 97, 97, 97, 106, 111, 106, 97, + 97, 97, 97, 97, 108, 112, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 0, 97, 97, 97, + + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97 + } ; + +static yyconst flex_int16_t yy_nxt[350] = + { 0, + 42, 11, 11, 97, 12, 20, 11, 97, 34, 13, + 21, 35, 14, 20, 11, 31, 11, 65, 21, 54, + 32, 31, 11, 15, 16, 53, 32, 47, 17, 48, + 49, 50, 66, 96, 55, 95, 18, 11, 11, 54, + 12, 78, 65, 51, 94, 13, 44, 85, 14, 48, + 74, 50, 74, 87, 55, 54, 79, 66, 93, 15, + 16, 54, 86, 51, 17, 51, 74, 88, 74, 88, + 55, 53, 18, 23, 11, 24, 55, 25, 25, 65, + 33, 26, 92, 27, 28, 25, 91, 78, 74, 87, + 90, 89, 73, 84, 66, 83, 44, 85, 82, 81, + + 59, 51, 79, 80, 29, 25, 23, 11, 24, 76, + 25, 25, 86, 75, 26, 73, 27, 28, 25, 71, + 64, 63, 62, 61, 60, 59, 57, 56, 41, 40, + 39, 38, 37, 33, 97, 97, 97, 29, 25, 44, + 44, 45, 97, 44, 44, 97, 97, 44, 97, 44, + 44, 44, 97, 97, 97, 97, 46, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 44, 44, 44, 44, 45, 97, 44, 44, 97, 97, + 44, 97, 44, 44, 44, 97, 97, 97, 97, 46, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + + 97, 97, 97, 44, 44, 68, 44, 69, 97, 68, + 68, 97, 97, 68, 97, 68, 68, 68, 97, 97, + 97, 97, 70, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 68, 68, 68, 44, + 69, 97, 68, 68, 97, 97, 68, 97, 68, 68, + 68, 97, 97, 97, 97, 70, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 68, + 68, 10, 10, 10, 19, 19, 19, 22, 22, 22, + 30, 30, 30, 36, 36, 43, 43, 43, 52, 52, + 53, 53, 53, 58, 58, 58, 44, 44, 44, 67, + + 67, 67, 72, 72, 72, 77, 77, 77, 68, 68, + 68, 9, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97 + } ; + +static yyconst flex_int16_t yy_chk[350] = + { 0, + 103, 1, 1, 0, 1, 3, 3, 0, 13, 1, + 3, 13, 1, 4, 4, 7, 7, 44, 4, 29, + 7, 8, 8, 1, 1, 54, 8, 26, 1, 26, + 26, 26, 44, 93, 29, 92, 1, 2, 2, 53, + 2, 54, 68, 26, 91, 2, 66, 66, 2, 50, + 50, 50, 71, 71, 53, 77, 54, 68, 90, 2, + 2, 79, 66, 50, 2, 71, 75, 75, 88, 88, + 77, 78, 2, 5, 5, 5, 79, 5, 5, 85, + 84, 5, 83, 5, 5, 5, 82, 78, 87, 87, + 81, 80, 72, 64, 85, 63, 86, 86, 62, 61, + + 58, 87, 78, 57, 5, 5, 6, 6, 6, 52, + 6, 6, 86, 51, 6, 49, 6, 6, 6, 48, + 40, 39, 38, 37, 36, 35, 33, 32, 21, 18, + 17, 16, 15, 12, 9, 0, 0, 6, 6, 24, + 24, 24, 0, 24, 24, 0, 0, 24, 0, 24, + 24, 24, 0, 0, 0, 0, 24, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 24, 24, 43, 43, 43, 0, 43, 43, 0, 0, + 43, 0, 43, 43, 43, 0, 0, 0, 0, 43, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 43, 43, 46, 46, 46, 0, 46, + 46, 0, 0, 46, 0, 46, 46, 46, 0, 0, + 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 46, 46, 70, 70, + 70, 0, 70, 70, 0, 0, 70, 0, 70, 70, + 70, 0, 0, 0, 0, 70, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, + 70, 98, 98, 98, 99, 99, 99, 100, 100, 100, + 101, 101, 101, 102, 102, 104, 104, 104, 105, 105, + 106, 106, 106, 107, 107, 107, 108, 108, 108, 109, + + 109, 109, 110, 110, 110, 111, 111, 111, 112, 112, + 112, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97 + } ; + +/* Table of booleans, true if rule could match eol. */ +static yyconst flex_int32_t yy_rule_can_match_eol[23] = + { 0, +1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, + 0, 0, 0, }; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +#line 1 "_jsgf_scanner.l" +/* -*- mode: text -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* YOU MUST USE FLEX 2.5.37 OR NEWER TO PROCESS THIS FILE!!! */ +#line 39 "_jsgf_scanner.l" + +#include "jsgf_internal.h" +#include "jsgf_parser.h" + +#define YY_NO_UNISTD_H 1 + + + +#line 609 "jsgf_scanner.c" + +#define INITIAL 0 +#define COMMENT 1 +#define DECL 2 +#define DECLCOMMENT 3 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +/* Holds the entire state of the reentrant scanner. */ +struct yyguts_t + { + + /* User-defined. Not touched by flex. */ + YY_EXTRA_TYPE yyextra_r; + + /* The rest are the same as the globals declared in the non-reentrant scanner. */ + FILE *yyin_r, *yyout_r; + size_t yy_buffer_stack_top; /**< index of top of stack. */ + size_t yy_buffer_stack_max; /**< capacity of stack. */ + YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ + char yy_hold_char; + yy_size_t yy_n_chars; + yy_size_t yyleng_r; + char *yy_c_buf_p; + int yy_init; + int yy_start; + int yy_did_buffer_switch_on_eof; + int yy_start_stack_ptr; + int yy_start_stack_depth; + int *yy_start_stack; + yy_state_type yy_last_accepting_state; + char* yy_last_accepting_cpos; + + int yylineno_r; + int yy_flex_debug_r; + + char *yytext_r; + int yy_more_flag; + int yy_more_len; + + YYSTYPE * yylval_r; + + }; /* end struct yyguts_t */ + +static int yy_init_globals (yyscan_t yyscanner ); + + /* This must go here because YYSTYPE and YYLTYPE are included + * from bison output in section 1.*/ + # define yylval yyg->yylval_r + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy (yyscan_t yyscanner ); + +int yyget_debug (yyscan_t yyscanner ); + +void yyset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner ); + +void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *yyget_in (yyscan_t yyscanner ); + +void yyset_in (FILE * in_str ,yyscan_t yyscanner ); + +FILE *yyget_out (yyscan_t yyscanner ); + +void yyset_out (FILE * out_str ,yyscan_t yyscanner ); + +yy_size_t yyget_leng (yyscan_t yyscanner ); + +char *yyget_text (yyscan_t yyscanner ); + +int yyget_lineno (yyscan_t yyscanner ); + +void yyset_lineno (int line_number ,yyscan_t yyscanner ); + +int yyget_column (yyscan_t yyscanner ); + +void yyset_column (int column_no ,yyscan_t yyscanner ); + +YYSTYPE * yyget_lval (yyscan_t yyscanner ); + +void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap (yyscan_t yyscanner ); +#else +extern int yywrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#ifdef __cplusplus +static int yyinput (yyscan_t yyscanner ); +#else +static int input (yyscan_t yyscanner ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + size_t n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param ,yyscan_t yyscanner); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + +#line 59 "_jsgf_scanner.l" + + +#line 850 "jsgf_scanner.c" + + yylval = yylval_param; + + if ( !yyg->yy_init ) + { + yyg->yy_init = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yyg->yy_start ) + yyg->yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); + } + + yy_load_buffer_state(yyscanner ); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yyg->yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yyg->yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yyg->yy_start; +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 98 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_current_state != 97 ); + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + + if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] ) + { + int yyl; + for ( yyl = 0; yyl < yyleng; ++yyl ) + if ( yytext[yyl] == '\n' ) + + do{ yylineno++; + yycolumn=0; + }while(0) +; + } + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yyg->yy_hold_char; + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 61 "_jsgf_scanner.l" +; /* ignore whitespace */ + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 62 "_jsgf_scanner.l" +; /* single-line comments */ + YY_BREAK +case 3: +YY_RULE_SETUP +#line 63 "_jsgf_scanner.l" +{ BEGIN(COMMENT); } /* C-style comments */ + YY_BREAK +case 4: +YY_RULE_SETUP +#line 64 "_jsgf_scanner.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 65 "_jsgf_scanner.l" +; /* Ignore stuff in comment mode */ + YY_BREAK +case 6: +/* rule 6 can match eol */ +YY_RULE_SETUP +#line 67 "_jsgf_scanner.l" +; /* single-line comments inside decl */ + YY_BREAK +case 7: +YY_RULE_SETUP +#line 68 "_jsgf_scanner.l" +{ BEGIN(DECLCOMMENT); } /* C-style comments inside decl */ + YY_BREAK +case 8: +YY_RULE_SETUP +#line 69 "_jsgf_scanner.l" +{ BEGIN(DECL); } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 70 "_jsgf_scanner.l" +; /* Ignore stuff in comment mode */ + YY_BREAK +case 10: +YY_RULE_SETUP +#line 72 "_jsgf_scanner.l" +{BEGIN(DECL); return HEADER;} + YY_BREAK +case 11: +YY_RULE_SETUP +#line 73 "_jsgf_scanner.l" +{BEGIN(DECL); return GRAMMAR;} + YY_BREAK +case 12: +YY_RULE_SETUP +#line 74 "_jsgf_scanner.l" +{BEGIN(DECL); return IMPORT;} + YY_BREAK +case 13: +YY_RULE_SETUP +#line 75 "_jsgf_scanner.l" +{BEGIN(DECL); return PUBLIC;} + YY_BREAK +case 14: +/* rule 14 can match eol */ +YY_RULE_SETUP +#line 77 "_jsgf_scanner.l" +{ BEGIN(DECL); yylval->name = strdup(yytext); return RULENAME; } + YY_BREAK +case 15: +/* rule 15 can match eol */ +YY_RULE_SETUP +#line 78 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return RULENAME; } + YY_BREAK +case 16: +/* rule 16 can match eol */ +YY_RULE_SETUP +#line 80 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return TAG; } + YY_BREAK +case 17: +YY_RULE_SETUP +#line 81 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return TOKEN; } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 82 "_jsgf_scanner.l" +{ BEGIN(INITIAL); return yytext[0]; } + YY_BREAK +case 19: +/* rule 19 can match eol */ +YY_RULE_SETUP +#line 83 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return TOKEN; } + YY_BREAK +case 20: +YY_RULE_SETUP +#line 84 "_jsgf_scanner.l" +{ yylval->weight = atof_c(yytext+1); return WEIGHT; } + YY_BREAK +case 21: +YY_RULE_SETUP +#line 85 "_jsgf_scanner.l" +return yytext[0]; /* Single-character tokens */ + YY_BREAK +case 22: +YY_RULE_SETUP +#line 87 "_jsgf_scanner.l" +ECHO; + YY_BREAK +#line 1060 "jsgf_scanner.c" +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(COMMENT): +case YY_STATE_EOF(DECL): +case YY_STATE_EOF(DECLCOMMENT): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yyg->yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); + + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yyg->yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_END_OF_FILE: + { + yyg->yy_did_buffer_switch_on_eof = 0; + + if ( yywrap(yyscanner ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = + yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yyg->yy_c_buf_p = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + register char *source = yyg->yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) (yyg->yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + yy_size_t new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + yyg->yy_n_chars, num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + if ( yyg->yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart(yyin ,yyscanner); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + } + + yyg->yy_n_chars += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (yyscan_t yyscanner) +{ + register yy_state_type yy_current_state; + register char *yy_cp; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_current_state = yyg->yy_start; + + for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 98 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) +{ + register int yy_is_jam; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ + register char *yy_cp = yyg->yy_c_buf_p; + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 98 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 97); + + (void)yyg; + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (yyscan_t yyscanner) +#else + static int input (yyscan_t yyscanner) +#endif + +{ + int c; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + *yyg->yy_c_buf_p = yyg->yy_hold_char; + + if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + /* This was really a NUL. */ + *yyg->yy_c_buf_p = '\0'; + + else + { /* need more input */ + yy_size_t offset = yyg->yy_c_buf_p - yyg->yytext_ptr; + ++yyg->yy_c_buf_p; + + switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart(yyin ,yyscanner); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap(yyscanner ) ) + return EOF; + + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(yyscanner); +#else + return input(yyscanner); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = yyg->yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ + *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ + yyg->yy_hold_char = *++yyg->yy_c_buf_p; + + if ( c == '\n' ) + + do{ yylineno++; + yycolumn=0; + }while(0) +; + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * @param yyscanner The scanner object. + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); + } + + yy_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner); + yy_load_buffer_state(yyscanner ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * @param yyscanner The scanner object. + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (yyscanner); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state(yyscanner ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yyg->yy_did_buffer_switch_on_eof = 1; +} + +static void yy_load_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + yyg->yy_hold_char = *yyg->yy_c_buf_p; +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * @param yyscanner The scanner object. + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ,yyscanner ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer(b,file ,yyscanner); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * @param yyscanner The scanner object. + */ + void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree((void *) b->yy_ch_buf ,yyscanner ); + + yyfree((void *) b ,yyscanner ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) + +{ + int oerrno = errno; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_flush_buffer(b ,yyscanner); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * @param yyscanner The scanner object. + */ + void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state(yyscanner ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * @param yyscanner The scanner object. + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(yyscanner); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + yyg->yy_buffer_stack_top++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state(yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * @param yyscanner The scanner object. + */ +void yypop_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner); + YY_CURRENT_BUFFER_LVALUE = NULL; + if (yyg->yy_buffer_stack_top > 0) + --yyg->yy_buffer_stack_top; + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state(yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (yyscan_t yyscanner) +{ + yy_size_t num_to_alloc; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (!yyg->yy_buffer_stack) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + yyg->yy_buffer_stack_max = num_to_alloc; + yyg->yy_buffer_stack_top = 0; + return; + } + + if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + int grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = yyg->yy_buffer_stack_max + grow_size; + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc + (yyg->yy_buffer_stack, + num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); + yyg->yy_buffer_stack_max = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer(b ,yyscanner ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) +{ + + return yy_scan_bytes(yystr,strlen(yystr) ,yyscanner); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = _yybytes_len + 2; + buf = (char *) yyalloc(n ,yyscanner ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer(buf,n ,yyscanner); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner) +{ + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = yyg->yy_hold_char; \ + yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ + yyg->yy_hold_char = *yyg->yy_c_buf_p; \ + *yyg->yy_c_buf_p = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the user-defined data for this scanner. + * @param yyscanner The scanner object. + */ +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyextra; +} + +/** Get the current line number. + * @param yyscanner The scanner object. + */ +int yyget_lineno (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yylineno; +} + +/** Get the current column number. + * @param yyscanner The scanner object. + */ +int yyget_column (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yycolumn; +} + +/** Get the input stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_in (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyin; +} + +/** Get the output stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_out (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyout; +} + +/** Get the length of the current token. + * @param yyscanner The scanner object. + */ +yy_size_t yyget_leng (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyleng; +} + +/** Get the current token. + * @param yyscanner The scanner object. + */ + +char *yyget_text (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yytext; +} + +/** Set the user-defined data. This data is never touched by the scanner. + * @param user_defined The data to be associated with this scanner. + * @param yyscanner The scanner object. + */ +void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyextra = user_defined ; +} + +/** Set the current line number. + * @param line_number + * @param yyscanner The scanner object. + */ +void yyset_lineno (int line_number , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* lineno is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); + + yylineno = line_number; +} + +/** Set the current column. + * @param line_number + * @param yyscanner The scanner object. + */ +void yyset_column (int column_no , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* column is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_column called with no buffer" ); + + yycolumn = column_no; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param in_str A readable stream. + * @param yyscanner The scanner object. + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * in_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyin = in_str ; +} + +void yyset_out (FILE * out_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyout = out_str ; +} + +int yyget_debug (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yy_flex_debug; +} + +void yyset_debug (int bdebug , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yy_flex_debug = bdebug ; +} + +/* Accessor methods for yylval and yylloc */ + +YYSTYPE * yyget_lval (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yylval; +} + +void yyset_lval (YYSTYPE * yylval_param , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yylval = yylval_param; +} + +/* User-visible API */ + +/* yylex_init is special because it creates the scanner itself, so it is + * the ONLY reentrant function that doesn't take the scanner as the last argument. + * That's why we explicitly handle the declaration, instead of using our macros. + */ + +int yylex_init(yyscan_t* ptr_yy_globals) + +{ + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + return yy_init_globals ( *ptr_yy_globals ); +} + +/* yylex_init_extra has the same functionality as yylex_init, but follows the + * convention of taking the scanner as the last argument. Note however, that + * this is a *pointer* to a scanner, as it will be allocated by this call (and + * is the reason, too, why this function also must handle its own declaration). + * The user defined value in the first argument will be available to yyalloc in + * the yyextra field. + */ + +int yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals ) + +{ + struct yyguts_t dummy_yyguts; + + yyset_extra (yy_user_defined, &dummy_yyguts); + + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in + yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + yyset_extra (yy_user_defined, *ptr_yy_globals); + + return yy_init_globals ( *ptr_yy_globals ); +} + +static int yy_init_globals (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + yyg->yy_buffer_stack = 0; + yyg->yy_buffer_stack_top = 0; + yyg->yy_buffer_stack_max = 0; + yyg->yy_c_buf_p = (char *) 0; + yyg->yy_init = 0; + yyg->yy_start = 0; + + yyg->yy_start_stack_ptr = 0; + yyg->yy_start_stack_depth = 0; + yyg->yy_start_stack = NULL; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = (FILE *) 0; + yyout = (FILE *) 0; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(yyscanner); + } + + /* Destroy the stack itself. */ + yyfree(yyg->yy_buffer_stack ,yyscanner); + yyg->yy_buffer_stack = NULL; + + /* Destroy the start condition stack. */ + yyfree(yyg->yy_start_stack ,yyscanner ); + yyg->yy_start_stack = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( yyscanner); + + /* Destroy the main struct (reentrant only). */ + yyfree ( yyscanner , yyscanner ); + yyscanner = NULL; + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner) +{ + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner) +{ + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size , yyscan_t yyscanner) +{ + return (void *) malloc( size ); +} + +void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner) +{ + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); +} + +void yyfree (void * ptr , yyscan_t yyscanner) +{ + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 87 "_jsgf_scanner.l" + + + diff --git a/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h new file mode 100644 index 000000000..72abefb88 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/jsgf_scanner.h @@ -0,0 +1,352 @@ +#ifndef yyHEADER_H +#define yyHEADER_H 1 +#define yyIN_HEADER 1 + +#line 6 "jsgf_scanner.h" + +#line 8 "jsgf_scanner.h" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 37 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +/* C99 requires __STDC__ to be defined as 1. */ +#if defined (__STDC__) + +#define YY_USE_CONST + +#endif /* defined (__STDC__) */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE 16384 +#endif + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + yy_size_t yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +void yyrestart (FILE *input_file ,yyscan_t yyscanner ); +void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void yypop_buffer_state (yyscan_t yyscanner ); + +YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); + +void *yyalloc (yy_size_t ,yyscan_t yyscanner ); +void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void yyfree (void * ,yyscan_t yyscanner ); + +/* Begin user sect3 */ + +#define yywrap(yyscanner) 1 +#define YY_SKIP_YYWRAP + +#define yytext_ptr yytext_r + +#ifdef YY_HEADER_EXPORT_START_CONDITIONS +#define INITIAL 0 +#define COMMENT 1 +#define DECL 2 +#define DECLCOMMENT 3 + +#endif + + +#ifdef HAVE_UNISTD_H +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy (yyscan_t yyscanner ); + +int yyget_debug (yyscan_t yyscanner ); + +void yyset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner ); + +void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *yyget_in (yyscan_t yyscanner ); + +void yyset_in (FILE * in_str ,yyscan_t yyscanner ); + +FILE *yyget_out (yyscan_t yyscanner ); + +void yyset_out (FILE * out_str ,yyscan_t yyscanner ); + +yy_size_t yyget_leng (yyscan_t yyscanner ); + +char *yyget_text (yyscan_t yyscanner ); + +int yyget_lineno (yyscan_t yyscanner ); + +void yyset_lineno (int line_number ,yyscan_t yyscanner ); + +int yyget_column (yyscan_t yyscanner ); + +void yyset_column (int column_no ,yyscan_t yyscanner ); + +YYSTYPE * yyget_lval (yyscan_t yyscanner ); + +void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap (yyscan_t yyscanner ); +#else +extern int yywrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param ,yyscan_t yyscanner); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +#undef YY_NEW_FILE +#undef YY_FLUSH_BUFFER +#undef yy_set_bol +#undef yy_new_buffer +#undef yy_set_interactive +#undef YY_DO_BEFORE_ACTION + +#ifdef YY_DECL_IS_OURS +#undef YY_DECL_IS_OURS +#undef YY_DECL +#endif + +#line 87 "_jsgf_scanner.l" + + +#line 348 "jsgf_scanner.h" +#undef yyIN_HEADER +#endif /* yyHEADER_H */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c new file mode 100644 index 000000000..e9943001e --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.c @@ -0,0 +1,258 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file lm3g_model.c Core Sphinx 3-gram code used in + * DMP/DMP32/ARPA (for now) model code. + * + * Author: A cast of thousands, probably. + */ +#include +#include +#include + +#include "sphinxbase/listelem_alloc.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +#include "lm3g_model.h" + +void +lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g) +{ + if (lm3g->tginfo == NULL) + return; + listelem_alloc_free(lm3g->le); + ckd_free(lm3g->tginfo); +} + +void +lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g) +{ + if (lm3g->tginfo == NULL) + return; + listelem_alloc_free(lm3g->le); + memset(lm3g->tginfo, 0, base->n_counts[0] * sizeof(tginfo_t *)); + lm3g->le = listelem_alloc_init(sizeof(tginfo_t)); +} + +void +lm3g_apply_weights(ngram_model_t *base, + lm3g_model_t *lm3g, + float32 lw, float32 wip, float32 uw) +{ + int32 log_wip, log_uw, log_uniform_weight; + int i; + + /* Precalculate some log values we will like. */ + log_wip = logmath_log(base->lmath, wip); + log_uw = logmath_log(base->lmath, uw); + log_uniform_weight = logmath_log(base->lmath, 1.0 - uw); + + for (i = 0; i < base->n_counts[0]; ++i) { + int32 prob1, bo_wt, n_used; + + /* Backoff weights just get scaled by the lw. */ + bo_wt = (int32)(lm3g->unigrams[i].bo_wt1.l / base->lw); + /* Unscaling unigram probs is a bit more complicated, so punt + * it back to the general code. */ + prob1 = ngram_ng_prob(base, i, NULL, 0, &n_used); + /* Now compute the new scaled probabilities. */ + lm3g->unigrams[i].bo_wt1.l = (int32)(bo_wt * lw); + if (strcmp(base->word_str[i], "") == 0) { /* FIXME: configurable start_sym */ + /* Apply language weight and WIP */ + lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip; + } + else { + /* Interpolate unigram probability with uniform. */ + prob1 += log_uw; + prob1 = logmath_add(base->lmath, prob1, base->log_uniform + log_uniform_weight); + /* Apply language weight and WIP */ + lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip; + } + } + + for (i = 0; i < lm3g->n_prob2; ++i) { + int32 prob2; + /* Can't just punt this back to general code since it is quantized. */ + prob2 = (int32)((lm3g->prob2[i].l - base->log_wip) / base->lw); + lm3g->prob2[i].l = (int32)(prob2 * lw) + log_wip; + } + + if (base->n > 2) { + for (i = 0; i < lm3g->n_bo_wt2; ++i) { + lm3g->bo_wt2[i].l = (int32)(lm3g->bo_wt2[i].l / base->lw * lw); + } + for (i = 0; i < lm3g->n_prob3; i++) { + int32 prob3; + /* Can't just punt this back to general code since it is quantized. */ + prob3 = (int32)((lm3g->prob3[i].l - base->log_wip) / base->lw); + lm3g->prob3[i].l = (int32)(prob3 * lw) + log_wip; + } + } + + /* Store updated values in the model. */ + base->log_wip = log_wip; + base->log_uw = log_uw; + base->log_uniform_weight = log_uniform_weight; + base->lw = lw; +} + +int32 +lm3g_add_ug(ngram_model_t *base, + lm3g_model_t *lm3g, int32 wid, int32 lweight) +{ + int32 score; + + /* This would be very bad if this happened! */ + assert(!NGRAM_IS_CLASSWID(wid)); + + /* Reallocate unigram array. */ + lm3g->unigrams = ckd_realloc(lm3g->unigrams, + sizeof(*lm3g->unigrams) * base->n_1g_alloc); + memset(lm3g->unigrams + base->n_counts[0], 0, + (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->unigrams)); + /* Reallocate tginfo array. */ + lm3g->tginfo = ckd_realloc(lm3g->tginfo, + sizeof(*lm3g->tginfo) * base->n_1g_alloc); + memset(lm3g->tginfo + base->n_counts[0], 0, + (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->tginfo)); + /* FIXME: we really ought to update base->log_uniform *and* + * renormalize all the other unigrams. This is really slow, so I + * will probably just provide a function to renormalize after + * adding unigrams, for anyone who really cares. */ + /* This could be simplified but then we couldn't do it in logmath */ + score = lweight + base->log_uniform + base->log_uw; + score = logmath_add(base->lmath, score, + base->log_uniform + base->log_uniform_weight); + lm3g->unigrams[wid].prob1.l = score; + /* This unigram by definition doesn't participate in any bigrams, + * so its backoff weight and bigram pointer are both undefined. */ + lm3g->unigrams[wid].bo_wt1.l = 0; + lm3g->unigrams[wid].bigrams = 0; + /* Finally, increase the unigram count */ + ++base->n_counts[0]; + /* FIXME: Note that this can actually be quite bogus due to the + * presence of class words. If wid falls outside the unigram + * count, increase it to compensate, at the cost of no longer + * really knowing how many unigrams we have :( */ + if (wid >= base->n_counts[0]) + base->n_counts[0] = wid + 1; + + return score; +} + +#define INITIAL_SORTED_ENTRIES MAX_UINT16 + +void +init_sorted_list(sorted_list_t * l) +{ + l->list = ckd_calloc(INITIAL_SORTED_ENTRIES, sizeof(sorted_entry_t)); + l->list[0].val.l = INT_MIN; + l->list[0].lower = 0; + l->list[0].higher = 0; + l->free = 1; + l->size = INITIAL_SORTED_ENTRIES; +} + +void +free_sorted_list(sorted_list_t * l) +{ + free(l->list); +} + +lmprob_t * +vals_in_sorted_list(sorted_list_t * l) +{ + lmprob_t *vals; + int32 i; + + vals = ckd_calloc(l->free, sizeof(lmprob_t)); + for (i = 0; i < l->free; i++) + vals[i] = l->list[i].val; + return (vals); +} + +int32 +sorted_id(sorted_list_t * l, int32 *val) +{ + int32 i = 0; + + for (;;) { + if (*val == l->list[i].val.l) + return (i); + if (*val < l->list[i].val.l) { + if (l->list[i].lower == 0) { + + if (l->free >= l->size) { + int newsize = l->size + INITIAL_SORTED_ENTRIES; + l->list = ckd_realloc(l->list, sizeof(sorted_entry_t) * newsize); + memset(l->list + l->size, + 0, INITIAL_SORTED_ENTRIES * sizeof(sorted_entry_t)); + l->size = newsize; + } + + l->list[i].lower = l->free; + (l->free)++; + i = l->list[i].lower; + l->list[i].val.l = *val; + return (i); + } + else + i = l->list[i].lower; + } + else { + if (l->list[i].higher == 0) { + + if (l->free >= l->size) { + int newsize = l->size + INITIAL_SORTED_ENTRIES; + l->list = ckd_realloc(l->list, sizeof(sorted_entry_t) * newsize); + memset(l->list + l->size, + 0, INITIAL_SORTED_ENTRIES * sizeof(sorted_entry_t)); + l->size = newsize; + } + + l->list[i].higher = l->free; + (l->free)++; + i = l->list[i].higher; + l->list[i].val.l = *val; + return (i); + } + else + i = l->list[i].higher; + } + } +} diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h new file mode 100644 index 000000000..698ed81f5 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/lm3g_model.h @@ -0,0 +1,177 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file lm3g_model.h Core Sphinx 3-gram code used in + * DMP/DMP32/ARPA (for now) model code. + * + * Author: A cast of thousands, probably. + */ + +#ifndef __NGRAM_MODEL_LM3G_H__ +#define __NGRAM_MODEL_LM3G_H__ + +#include "sphinxbase/listelem_alloc.h" + +#include "ngram_model_internal.h" + +/** + * Type used to store language model probabilities + */ +typedef union { + float32 f; + int32 l; +} lmprob_t; + +/** + * Bigram probs and bo-wts, and trigram probs are kept in separate + * tables rather than within the bigram_t and trigram_t structures. + * These tables hold unique prob and bo-wt values. The following tree + * structure is used to construct these tables of unique values. + * Whenever a new value is read from the LM file, the sorted tree + * structure is searched to see if the value already exists, and + * inserted if not found. + */ +typedef struct sorted_entry_s { + lmprob_t val; /**< value being kept in this node */ + uint32 lower; /**< index of another entry. All descendants down + this path have their val < this node's val. + 0 => no son exists (0 is root index) */ + uint32 higher; /**< index of another entry. All descendants down + this path have their val > this node's val + 0 => no son exists (0 is root index) */ +} sorted_entry_t; + +/** + * The sorted list. list is a (64K long) array. The first entry is the + * root of the tree and is created during initialization. + */ +typedef struct { + sorted_entry_t *list; + int32 free; /**< first free element in list */ + int32 size; +} sorted_list_t; + +/** + * Unigram structure (common among all lm3g implementations) + */ +typedef struct unigram_s { + lmprob_t prob1; /**< Unigram probability. */ + lmprob_t bo_wt1; /**< Unigram backoff weight. */ + int32 bigrams; /**< Index of 1st entry in lm_t.bigrams[] */ +} unigram_t; + +/** + * Bigram structure (might be implemented differently) + */ +typedef struct bigram_s bigram_t; +/** + * Trigram structure (might be implemented differently) + */ +typedef struct trigram_s trigram_t; + + +/* + * To conserve space, bigram info is kept in many tables. Since the number + * of distinct values << #bigrams, these table indices can be 16-bit values. + * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy. + * It is supposed to be the index of the first trigram entry for each bigram. + * But such an index cannot be represented in 16-bits, hence the following + * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ + * consecutive entries, such that #trigrams in each segment <= 2**16 (the + * corresponding trigram segment). The bigram_t.trigrams value is then a + * 16-bit relative index within the trigram segment. A separate table-- + * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment. + */ +#define BG_SEG_SZ 512 /* chosen so that #trigram/segment <= 2**16 */ +#define LOG_BG_SEG_SZ 9 + +/** + * Trigram information cache. + * + * The following trigram information cache eliminates most traversals of 1g->2g->3g + * tree to locate trigrams for a given bigram (lw1,lw2). The organization is optimized + * for locality of access (to the same lw1), given lw2. + */ +typedef struct tginfo_s { + int32 w1; /**< lw1 component of bigram lw1,lw2. All bigrams with + same lw2 linked together (see lm_t.tginfo). */ + int32 n_tg; /**< number tg for parent bigram lw1,lw2 */ + int32 bowt; /**< tg bowt for lw1,lw2 */ + int32 used; /**< whether used since last lm_reset */ + trigram_t *tg; /**< Trigrams for lw1,lw2 */ + struct tginfo_s *next; /**< Next lw1 with same parent lw2; NULL if none. */ +} tginfo_t; + +/** + * Common internal structure for Sphinx 3-gram models. + */ +typedef struct lm3g_model_s { + unigram_t *unigrams; + bigram_t *bigrams; + trigram_t *trigrams; + lmprob_t *prob2; /**< Table of actual bigram probs */ + int32 n_prob2; /**< prob2 size */ + lmprob_t *bo_wt2; /**< Table of actual bigram backoff weights */ + int32 n_bo_wt2; /**< bo_wt2 size */ + lmprob_t *prob3; /**< Table of actual trigram probs */ + int32 n_prob3; /**< prob3 size */ + int32 *tseg_base; /**< tseg_base[i>>LOG_BG_SEG_SZ] = index of 1st + trigram for bigram segment (i>>LOG_BG_SEG_SZ) */ + tginfo_t **tginfo; /**< tginfo[lw2] is head of linked list of trigram information for + some cached subset of bigrams (*,lw2). */ + listelem_alloc_t *le; /**< List element allocator for tginfo. */ +} lm3g_model_t; + +void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g); +void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g); +void lm3g_apply_weights(ngram_model_t *base, + lm3g_model_t *lm3g, + float32 lw, float32 wip, float32 uw); +int32 lm3g_add_ug(ngram_model_t *base, + lm3g_model_t *lm3g, int32 wid, int32 lweight); + + +/** + * Initialize sorted list with the 0-th entry = MIN_PROB_F, which may be needed + * to replace spurious values in the Darpa LM file. + */ +void init_sorted_list(sorted_list_t *l); +void free_sorted_list(sorted_list_t *l); +lmprob_t *vals_in_sorted_list(sorted_list_t *l); +int32 sorted_id(sorted_list_t * l, int32 *val); + +#endif /* __NGRAM_MODEL_LM3G_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c new file mode 100644 index 000000000..080cfa8e6 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c @@ -0,0 +1,560 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file lm3g_templates.c Core Sphinx 3-gram code used in + * DMP/DMP32/ARPA (for now) model code. + */ + +#include + +/* Locate a specific bigram within a bigram list */ +#define BINARY_SEARCH_THRESH 16 +static int32 +find_bg(bigram_t * bg, int32 n, int32 w) +{ + int32 i, b, e; + + /* Binary search until segment size < threshold */ + b = 0; + e = n; + while (e - b > BINARY_SEARCH_THRESH) { + i = (b + e) >> 1; + if (bg[i].wid < w) + b = i + 1; + else if (bg[i].wid > w) + e = i; + else + return i; + } + + /* Linear search within narrowed segment */ + for (i = b; (i < e) && (bg[i].wid != w); i++); + return ((i < e) ? i : -1); +} + +static int32 +lm3g_bg_score(NGRAM_MODEL_TYPE *model, + int32 lw1, int32 lw2, int32 *n_used) +{ + int32 i, n, b, score; + bigram_t *bg; + + if (lw1 < 0 || model->base.n < 2) { + *n_used = 1; + return model->lm3g.unigrams[lw2].prob1.l; + } + + b = FIRST_BG(model, lw1); + n = FIRST_BG(model, lw1 + 1) - b; + bg = model->lm3g.bigrams + b; + + if ((i = find_bg(bg, n, lw2)) >= 0) { + /* Access mode = bigram */ + *n_used = 2; + score = model->lm3g.prob2[bg[i].prob2].l; + } + else { + /* Access mode = unigram */ + *n_used = 1; + score = model->lm3g.unigrams[lw1].bo_wt1.l + model->lm3g.unigrams[lw2].prob1.l; + } + + return (score); +} + +static void +load_tginfo(NGRAM_MODEL_TYPE *model, int32 lw1, int32 lw2) +{ + int32 i, n, b, t; + bigram_t *bg; + tginfo_t *tginfo; + + /* First allocate space for tg information for bg lw1,lw2 */ + tginfo = (tginfo_t *) listelem_malloc(model->lm3g.le); + tginfo->w1 = lw1; + tginfo->tg = NULL; + tginfo->next = model->lm3g.tginfo[lw2]; + model->lm3g.tginfo[lw2] = tginfo; + + /* Locate bigram lw1,lw2 */ + b = model->lm3g.unigrams[lw1].bigrams; + n = model->lm3g.unigrams[lw1 + 1].bigrams - b; + bg = model->lm3g.bigrams + b; + + if ((n > 0) && ((i = find_bg(bg, n, lw2)) >= 0)) { + tginfo->bowt = model->lm3g.bo_wt2[bg[i].bo_wt2].l; + + /* Find t = Absolute first trigram index for bigram lw1,lw2 */ + b += i; /* b = Absolute index of bigram lw1,lw2 on disk */ + t = FIRST_TG(model, b); + + tginfo->tg = model->lm3g.trigrams + t; + + /* Find #tg for bigram w1,w2 */ + tginfo->n_tg = FIRST_TG(model, b + 1) - t; + } + else { /* No bigram w1,w2 */ + tginfo->bowt = 0; + tginfo->n_tg = 0; + } +} + +/* Similar to find_bg */ +static int32 +find_tg(trigram_t * tg, int32 n, uint32 w) +{ + int32 i, b, e; + + b = 0; + e = n; + while (e - b > BINARY_SEARCH_THRESH) { + i = (b + e) >> 1; + if (tg[i].wid < w) + b = i + 1; + else if (tg[i].wid > w) + e = i; + else + return i; + } + + for (i = b; (i < e) && (tg[i].wid != w); i++); + return ((i < e) ? i : -1); +} + +static int32 +lm3g_tg_score(NGRAM_MODEL_TYPE *model, int32 lw1, + int32 lw2, int32 lw3, int32 *n_used) +{ + ngram_model_t *base = &model->base; + int32 i, n, score; + trigram_t *tg; + tginfo_t *tginfo, *prev_tginfo; + + if ((base->n < 3) || (lw1 < 0) || (lw2 < 0)) + return (lm3g_bg_score(model, lw2, lw3, n_used)); + + prev_tginfo = NULL; + for (tginfo = model->lm3g.tginfo[lw2]; tginfo; tginfo = tginfo->next) { + if (tginfo->w1 == lw1) + break; + prev_tginfo = tginfo; + } + + if (!tginfo) { + load_tginfo(model, lw1, lw2); + tginfo = model->lm3g.tginfo[lw2]; + } + else if (prev_tginfo) { + prev_tginfo->next = tginfo->next; + tginfo->next = model->lm3g.tginfo[lw2]; + model->lm3g.tginfo[lw2] = tginfo; + } + + tginfo->used = 1; + + /* Trigrams for w1,w2 now pointed to by tginfo */ + n = tginfo->n_tg; + tg = tginfo->tg; + if ((i = find_tg(tg, n, lw3)) >= 0) { + /* Access mode = trigram */ + *n_used = 3; + score = model->lm3g.prob3[tg[i].prob3].l; + } + else { + score = tginfo->bowt + lm3g_bg_score(model, lw2, lw3, n_used); + } + + return (score); +} + +static int32 +lm3g_template_score(ngram_model_t *base, int32 wid, + int32 *history, int32 n_hist, + int32 *n_used) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; + switch (n_hist) { + case 0: + /* Access mode: unigram */ + *n_used = 1; + return model->lm3g.unigrams[wid].prob1.l; + case 1: + return lm3g_bg_score(model, history[0], wid, n_used); + case 2: + default: + /* Anything greater than 2 is the same as a trigram for now. */ + return lm3g_tg_score(model, history[1], history[0], wid, n_used); + } +} + +static int32 +lm3g_template_raw_score(ngram_model_t *base, int32 wid, + int32 *history, int32 n_hist, + int32 *n_used) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; + int32 score; + + switch (n_hist) { + case 0: + /* Access mode: unigram */ + *n_used = 1; + /* Undo insertion penalty. */ + score = model->lm3g.unigrams[wid].prob1.l - base->log_wip; + /* Undo language weight. */ + score = (int32)(score / base->lw); + /* Undo unigram interpolation */ + if (strcmp(base->word_str[wid], "") != 0) { /* FIXME: configurable start_sym */ + /* This operation is numerically unstable, so try to avoid it + * as possible */ + if (base->log_uniform + base->log_uniform_weight > logmath_get_zero(base->lmath)) { + score = logmath_log(base->lmath, + logmath_exp(base->lmath, score) + - logmath_exp(base->lmath, + base->log_uniform + base->log_uniform_weight)); + } + } + return score; + case 1: + score = lm3g_bg_score(model, history[0], wid, n_used); + break; + case 2: + default: + /* Anything greater than 2 is the same as a trigram for now. */ + score = lm3g_tg_score(model, history[1], history[0], wid, n_used); + break; + } + /* FIXME (maybe): This doesn't undo unigram weighting in backoff cases. */ + return (int32)((score - base->log_wip) / base->lw); +} + +static int32 +lm3g_template_add_ug(ngram_model_t *base, + int32 wid, int32 lweight) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; + return lm3g_add_ug(base, &model->lm3g, wid, lweight); +} + +static void +lm3g_template_flush(ngram_model_t *base) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; + lm3g_tginfo_reset(base, &model->lm3g); +} + +typedef struct lm3g_iter_s { + ngram_iter_t base; + unigram_t *ug; + bigram_t *bg; + trigram_t *tg; +} lm3g_iter_t; + +static ngram_iter_t * +lm3g_template_iter(ngram_model_t *base, int32 wid, + int32 *history, int32 n_hist) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; + lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); + + ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE); + + if (n_hist == 0) { + /* Unigram is the easiest. */ + itor->ug = model->lm3g.unigrams + wid; + return (ngram_iter_t *)itor; + } + else if (n_hist == 1) { + int32 i, n, b; + /* Find the bigram, as in bg_score above (duplicate code...) */ + itor->ug = model->lm3g.unigrams + history[0]; + b = FIRST_BG(model, history[0]); + n = FIRST_BG(model, history[0] + 1) - b; + itor->bg = model->lm3g.bigrams + b; + /* If no such bigram exists then fail. */ + if ((i = find_bg(itor->bg, n, wid)) < 0) { + ngram_iter_free((ngram_iter_t *)itor); + return NULL; + } + itor->bg += i; + return (ngram_iter_t *)itor; + } + else if (n_hist == 2) { + int32 i, n; + tginfo_t *tginfo, *prev_tginfo; + /* Find the trigram, as in tg_score above (duplicate code...) */ + itor->ug = model->lm3g.unigrams + history[1]; + prev_tginfo = NULL; + for (tginfo = model->lm3g.tginfo[history[0]]; + tginfo; tginfo = tginfo->next) { + if (tginfo->w1 == history[1]) + break; + prev_tginfo = tginfo; + } + + if (!tginfo) { + load_tginfo(model, history[1], history[0]); + tginfo = model->lm3g.tginfo[history[0]]; + } + else if (prev_tginfo) { + prev_tginfo->next = tginfo->next; + tginfo->next = model->lm3g.tginfo[history[0]]; + model->lm3g.tginfo[history[0]] = tginfo; + } + + tginfo->used = 1; + + /* Trigrams for w1,w2 now pointed to by tginfo */ + n = tginfo->n_tg; + itor->tg = tginfo->tg; + if ((i = find_tg(itor->tg, n, wid)) >= 0) { + itor->tg += i; + /* Now advance the bigram pointer accordingly. FIXME: + * Note that we actually already found the relevant bigram + * in load_tginfo. */ + itor->bg = model->lm3g.bigrams; + while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1)) + <= (itor->tg - model->lm3g.trigrams)) + ++itor->bg; + return (ngram_iter_t *)itor; + } + else { + ngram_iter_free((ngram_iter_t *)itor); + return (ngram_iter_t *)NULL; + } + } + else { + /* Should not happen. */ + assert(n_hist == 0); /* Guaranteed to fail. */ + ngram_iter_free((ngram_iter_t *)itor); + return NULL; + } +} + +static ngram_iter_t * +lm3g_template_mgrams(ngram_model_t *base, int m) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; + lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); + ngram_iter_init((ngram_iter_t *)itor, base, m, FALSE); + + itor->ug = model->lm3g.unigrams; + itor->bg = model->lm3g.bigrams; + itor->tg = model->lm3g.trigrams; + + /* Advance bigram pointer to match first trigram. */ + if (m > 1 && base->n_counts[1] > 1) { + while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1)) + <= (itor->tg - model->lm3g.trigrams)) + ++itor->bg; + } + + /* Advance unigram pointer to match first bigram. */ + if (m > 0 && base->n_counts[0] > 1) { + while (itor->ug[1].bigrams <= (itor->bg - model->lm3g.bigrams)) + ++itor->ug; + } + + return (ngram_iter_t *)itor; +} + +static ngram_iter_t * +lm3g_template_successors(ngram_iter_t *bitor) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)bitor->model; + lm3g_iter_t *from = (lm3g_iter_t *)bitor; + lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); + + itor->ug = from->ug; + switch (bitor->m) { + case 0: + /* Next itor bigrams is the same as this itor bigram or + itor bigrams is more than total count. This means no successors */ + if (((itor->ug + 1) - model->lm3g.unigrams < bitor->model->n_counts[0] && + itor->ug->bigrams == (itor->ug + 1)->bigrams) || + itor->ug->bigrams == bitor->model->n_counts[1]) + goto done; + + /* Start iterating from first bigram successor of from->ug. */ + itor->bg = model->lm3g.bigrams + itor->ug->bigrams; + break; + case 1: + itor->bg = from->bg; + + /* This indicates no successors */ + if (((itor->bg + 1) - model->lm3g.bigrams < bitor->model->n_counts[1] && + FIRST_TG (model, itor->bg - model->lm3g.bigrams) == + FIRST_TG (model, (itor->bg + 1) - model->lm3g.bigrams)) || + FIRST_TG (model, itor->bg - model->lm3g.bigrams) == bitor->model->n_counts[2]) + goto done; + + /* Start iterating from first trigram successor of from->bg. */ + itor->tg = (model->lm3g.trigrams + + FIRST_TG(model, (itor->bg - model->lm3g.bigrams))); +#if 0 + printf("%s %s => %d (%s)\n", + model->base.word_str[itor->ug - model->lm3g.unigrams], + model->base.word_str[itor->bg->wid], + FIRST_TG(model, (itor->bg - model->lm3g.bigrams)), + model->base.word_str[itor->tg->wid]); +#endif + break; + case 2: + default: + /* All invalid! */ + goto done; + } + + ngram_iter_init((ngram_iter_t *)itor, bitor->model, bitor->m + 1, TRUE); + return (ngram_iter_t *)itor; + done: + ckd_free(itor); + return NULL; +} + +static int32 const * +lm3g_template_iter_get(ngram_iter_t *base, + int32 *out_score, int32 *out_bowt) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model; + lm3g_iter_t *itor = (lm3g_iter_t *)base; + + base->wids[0] = itor->ug - model->lm3g.unigrams; + if (itor->bg) base->wids[1] = itor->bg->wid; + if (itor->tg) base->wids[2] = itor->tg->wid; +#if 0 + printf("itor_get: %d %d %d\n", base->wids[0], base->wids[1], base->wids[2]); +#endif + + switch (base->m) { + case 0: + *out_score = itor->ug->prob1.l; + *out_bowt = itor->ug->bo_wt1.l; + break; + case 1: + *out_score = model->lm3g.prob2[itor->bg->prob2].l; + if (model->lm3g.bo_wt2) + *out_bowt = model->lm3g.bo_wt2[itor->bg->bo_wt2].l; + else + *out_bowt = 0; + break; + case 2: + *out_score = model->lm3g.prob3[itor->tg->prob3].l; + *out_bowt = 0; + break; + default: /* Should not happen. */ + return NULL; + } + return base->wids; +} + +static ngram_iter_t * +lm3g_template_iter_next(ngram_iter_t *base) +{ + NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model; + lm3g_iter_t *itor = (lm3g_iter_t *)base; + + switch (base->m) { + case 0: + ++itor->ug; + /* Check for end condition. */ + if (itor->ug - model->lm3g.unigrams >= base->model->n_counts[0]) + goto done; + break; + case 1: + ++itor->bg; + /* Check for end condition. */ + if (itor->bg - model->lm3g.bigrams >= base->model->n_counts[1]) + goto done; + /* Advance unigram pointer if necessary in order to get one + * that points to this bigram. */ + while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) { + /* Stop if this is a successor iterator, since we don't + * want a new unigram. */ + if (base->successor) + goto done; + ++itor->ug; + if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) { + E_ERROR("Bigram %d has no valid unigram parent\n", + itor->bg - model->lm3g.bigrams); + goto done; + } + } + break; + case 2: + ++itor->tg; + /* Check for end condition. */ + if (itor->tg - model->lm3g.trigrams >= base->model->n_counts[2]) + goto done; + /* Advance bigram pointer if necessary. */ + while (itor->tg - model->lm3g.trigrams >= + FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))) { + if (base->successor) + goto done; + ++itor->bg; + if (itor->bg == model->lm3g.bigrams + base->model->n_counts[1]) { + E_ERROR("Trigram %d has no valid bigram parent\n", + itor->tg - model->lm3g.trigrams); + + goto done; + } + } + /* Advance unigram pointer if necessary. */ + while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) { + ++itor->ug; + if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) { + E_ERROR("Trigram %d has no valid unigram parent\n", + itor->tg - model->lm3g.trigrams); + goto done; + } + } + break; + default: /* Should not happen. */ + goto done; + } + + return (ngram_iter_t *)itor; +done: + ngram_iter_free(base); + return NULL; +} + +static void +lm3g_template_iter_free(ngram_iter_t *base) +{ + ckd_free(base); +} diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c new file mode 100644 index 000000000..02af4151b --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model.c @@ -0,0 +1,1129 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model.c N-Gram language models. + * + * Author: David Huggins-Daines, much code taken from sphinx3/src/libs3decoder/liblm + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "sphinxbase/ngram_model.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/err.h" +#include "sphinxbase/logmath.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/case.h" + +#include "ngram_model_internal.h" + +ngram_file_type_t +ngram_file_name_to_type(const char *file_name) +{ + const char *ext; + + ext = strrchr(file_name, '.'); + if (ext == NULL) { + return NGRAM_INVALID; + } + if (0 == strcmp_nocase(ext, ".gz")) { + while (--ext >= file_name) { + if (*ext == '.') break; + } + if (ext < file_name) { + return NGRAM_INVALID; + } + } + else if (0 == strcmp_nocase(ext, ".bz2")) { + while (--ext >= file_name) { + if (*ext == '.') break; + } + if (ext < file_name) { + return NGRAM_INVALID; + } + } + /* We use strncmp because there might be a .gz on the end. */ + if (0 == strncmp_nocase(ext, ".ARPA", 5)) + return NGRAM_ARPA; + if (0 == strncmp_nocase(ext, ".DMP", 4)) + return NGRAM_DMP; + return NGRAM_INVALID; + } + +ngram_file_type_t +ngram_str_to_type(const char *str_name) +{ + if (0 == strcmp_nocase(str_name, "arpa")) + return NGRAM_ARPA; + if (0 == strcmp_nocase(str_name, "dmp")) + return NGRAM_DMP; + return NGRAM_INVALID; +} + +char const * +ngram_type_to_str(int type) +{ + switch (type) { + case NGRAM_ARPA: + return "arpa"; + case NGRAM_DMP: + return "dmp"; + default: + return NULL; + } +} + + + ngram_model_t * + ngram_model_read(cmd_ln_t *config, + const char *file_name, + ngram_file_type_t file_type, + logmath_t *lmath) + { + ngram_model_t *model = NULL; + + switch (file_type) { + case NGRAM_AUTO: { + if ((model = ngram_model_arpa_read(config, file_name, lmath)) != NULL) + break; + if ((model = ngram_model_dmp_read(config, file_name, lmath)) != NULL) + break; + return NULL; + } + case NGRAM_ARPA: + model = ngram_model_arpa_read(config, file_name, lmath); + break; + case NGRAM_DMP: + model = ngram_model_dmp_read(config, file_name, lmath); + break; + default: + E_ERROR("language model file type not supported\n"); + return NULL; + } + + /* Now set weights based on config if present. */ + if (config) { + float32 lw = 1.0; + float32 wip = 1.0; + float32 uw = 1.0; + + if (cmd_ln_exists_r(config, "-lw")) + lw = cmd_ln_float32_r(config, "-lw"); + if (cmd_ln_exists_r(config, "-wip")) + wip = cmd_ln_float32_r(config, "-wip"); + if (cmd_ln_exists_r(config, "-uw")) + uw = cmd_ln_float32_r(config, "-uw"); + + ngram_model_apply_weights(model, lw, wip, uw); + } + + return model; + } + + int + ngram_model_write(ngram_model_t *model, const char *file_name, + ngram_file_type_t file_type) + { + switch (file_type) { + case NGRAM_AUTO: { + file_type = ngram_file_name_to_type(file_name); + /* Default to ARPA (catches .lm and other things) */ + if (file_type == NGRAM_INVALID) + file_type = NGRAM_ARPA; + return ngram_model_write(model, file_name, file_type); + } + case NGRAM_ARPA: + return ngram_model_arpa_write(model, file_name); + case NGRAM_DMP: + return ngram_model_dmp_write(model, file_name); + default: + E_ERROR("language model file type not supported\n"); + return -1; + } + E_ERROR("language model file type not supported\n"); + return -1; + } + + int32 + ngram_model_init(ngram_model_t *base, + ngram_funcs_t *funcs, + logmath_t *lmath, + int32 n, int32 n_unigram) + { + base->refcount = 1; + base->funcs = funcs; + base->n = n; + /* If this was previously initialized... */ + if (base->n_counts == NULL) + base->n_counts = ckd_calloc(3, sizeof(*base->n_counts)); + /* Don't reset weights if logmath object hasn't changed. */ + if (base->lmath != lmath) { + /* Set default values for weights. */ + base->lw = 1.0; + base->log_wip = 0; /* i.e. 1.0 */ + base->log_uw = 0; /* i.e. 1.0 */ + base->log_uniform = logmath_log(lmath, 1.0 / n_unigram); + base->log_uniform_weight = logmath_get_zero(lmath); + base->log_zero = logmath_get_zero(lmath); + base->lmath = lmath; + } + /* Allocate or reallocate space for word strings. */ + if (base->word_str) { + /* Free all previous word strings if they were allocated. */ + if (base->writable) { + int32 i; + for (i = 0; i < base->n_words; ++i) { + ckd_free(base->word_str[i]); + base->word_str[i] = NULL; + } + } + base->word_str = ckd_realloc(base->word_str, n_unigram * sizeof(char *)); + } + else + base->word_str = ckd_calloc(n_unigram, sizeof(char *)); + /* NOTE: They are no longer case-insensitive since we are allowing + * other encodings for word strings. Beware. */ + if (base->wid) + hash_table_empty(base->wid); + else + base->wid = hash_table_new(n_unigram, FALSE); + base->n_counts[0] = base->n_1g_alloc = base->n_words = n_unigram; + + return 0; +} + +ngram_model_t * +ngram_model_retain(ngram_model_t *model) +{ + ++model->refcount; + return model; +} + + +void +ngram_model_flush(ngram_model_t *model) +{ + if (model->funcs && model->funcs->flush) + (*model->funcs->flush)(model); +} + +int +ngram_model_free(ngram_model_t *model) +{ + int i; + + if (model == NULL) + return 0; + if (--model->refcount > 0) + return model->refcount; + if (model->funcs && model->funcs->free) + (*model->funcs->free)(model); + if (model->writable) { + /* Free all words. */ + for (i = 0; i < model->n_words; ++i) { + ckd_free(model->word_str[i]); + } + } + else { + /* Free all class words. */ + for (i = 0; i < model->n_classes; ++i) { + ngram_class_t *lmclass; + int32 j; + + lmclass = model->classes[i]; + for (j = 0; j < lmclass->n_words; ++j) { + ckd_free(model->word_str[lmclass->start_wid + j]); + } + for (j = 0; j < lmclass->n_hash; ++j) { + if (lmclass->nword_hash[j].wid != -1) { + ckd_free(model->word_str[lmclass->nword_hash[j].wid]); + } + } + } + } + for (i = 0; i < model->n_classes; ++i) { + ngram_class_free(model->classes[i]); + } + ckd_free(model->classes); + hash_table_free(model->wid); + ckd_free(model->word_str); + ckd_free(model->n_counts); + ckd_free(model); + return 0; +} + +int +ngram_model_casefold(ngram_model_t *model, int kase) +{ + int writable, i; + hash_table_t *new_wid; + + /* Were word strings already allocated? */ + writable = model->writable; + /* Either way, we are going to allocate some word strings. */ + model->writable = TRUE; + + /* And, don't forget, we need to rebuild the word to unigram ID + * mapping. */ + new_wid = hash_table_new(model->n_words, FALSE); + for (i = 0; i < model->n_words; ++i) { + char *outstr; + if (writable) { + outstr = model->word_str[i]; + } + else { + outstr = ckd_salloc(model->word_str[i]); + } + /* Don't case-fold or [classes] */ + if (outstr[0] == '<' || outstr[0] == '[') { + } + else { + switch (kase) { + case NGRAM_UPPER: + ucase(outstr); + break; + case NGRAM_LOWER: + lcase(outstr); + break; + default: + ; + } + } + model->word_str[i] = outstr; + + /* Now update the hash table. We might have terrible + * collisions here, so warn about them. */ + if (hash_table_enter_int32(new_wid, model->word_str[i], i) != i) { + E_WARN("Duplicate word in dictionary after conversion: %s\n", + model->word_str[i]); + } + } + /* Swap out the hash table. */ + hash_table_free(model->wid); + model->wid = new_wid; + return 0; +} + +int +ngram_model_apply_weights(ngram_model_t *model, + float32 lw, float32 wip, float32 uw) +{ + return (*model->funcs->apply_weights)(model, lw, wip, uw); +} + +float32 +ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip, + int32 *out_log_uw) +{ + if (out_log_wip) *out_log_wip = model->log_wip; + if (out_log_uw) *out_log_uw = model->log_uw; + return model->lw; +} + + +int32 +ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history, + int32 n_hist, int32 *n_used) +{ + int32 score, class_weight = 0; + int i; + + /* Closed vocabulary, OOV word probability is zero */ + if (wid == NGRAM_INVALID_WID) + return model->log_zero; + + /* "Declassify" wid and history */ + if (NGRAM_IS_CLASSWID(wid)) { + ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; + + class_weight = ngram_class_prob(lmclass, wid); + if (class_weight == 1) /* Meaning, not found in class. */ + return model->log_zero; + wid = lmclass->tag_wid; + } + for (i = 0; i < n_hist; ++i) { + if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i])) + history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid; + } + score = (*model->funcs->score)(model, wid, history, n_hist, n_used); + + /* Multiply by unigram in-class weight. */ + return score + class_weight; +} + +int32 +ngram_score(ngram_model_t *model, const char *word, ...) +{ + va_list history; + const char *hword; + int32 *histid; + int32 n_hist; + int32 n_used; + int32 prob; + + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) + ++n_hist; + va_end(history); + + histid = ckd_calloc(n_hist, sizeof(*histid)); + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) { + histid[n_hist] = ngram_wid(model, hword); + ++n_hist; + } + va_end(history); + + prob = ngram_ng_score(model, ngram_wid(model, word), + histid, n_hist, &n_used); + ckd_free(histid); + return prob; +} + +int32 +ngram_tg_score(ngram_model_t *model, int32 w3, int32 w2, int32 w1, int32 *n_used) +{ + int32 hist[2]; + hist[0] = w2; + hist[1] = w1; + return ngram_ng_score(model, w3, hist, 2, n_used); +} + +int32 +ngram_bg_score(ngram_model_t *model, int32 w2, int32 w1, int32 *n_used) +{ + return ngram_ng_score(model, w2, &w1, 1, n_used); +} + +int32 +ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history, + int32 n_hist, int32 *n_used) +{ + int32 prob, class_weight = 0; + int i; + + /* Closed vocabulary, OOV word probability is zero */ + if (wid == NGRAM_INVALID_WID) + return model->log_zero; + + /* "Declassify" wid and history */ + if (NGRAM_IS_CLASSWID(wid)) { + ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; + + class_weight = ngram_class_prob(lmclass, wid); + if (class_weight == 1) /* Meaning, not found in class. */ + return class_weight; + wid = lmclass->tag_wid; + } + for (i = 0; i < n_hist; ++i) { + if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i])) + history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid; + } + prob = (*model->funcs->raw_score)(model, wid, history, + n_hist, n_used); + /* Multiply by unigram in-class weight. */ + return prob + class_weight; +} + +int32 +ngram_probv(ngram_model_t *model, const char *word, ...) +{ + va_list history; + const char *hword; + int32 *histid; + int32 n_hist; + int32 n_used; + int32 prob; + + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) + ++n_hist; + va_end(history); + + histid = ckd_calloc(n_hist, sizeof(*histid)); + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) { + histid[n_hist] = ngram_wid(model, hword); + ++n_hist; + } + va_end(history); + + prob = ngram_ng_prob(model, ngram_wid(model, word), + histid, n_hist, &n_used); + ckd_free(histid); + return prob; +} + +int32 +ngram_prob(ngram_model_t *model, const char *const *words, int32 n) +{ + int32 *ctx_id; + int32 nused; + int32 prob; + int32 wid; + uint32 i; + + ctx_id = (int32 *)ckd_calloc(n - 1, sizeof(*ctx_id)); + for (i = 1; i < n; ++i) + ctx_id[i - 1] = ngram_wid(model, words[i]); + + wid = ngram_wid(model, *words); + prob = ngram_ng_prob(model, wid, ctx_id, n - 1, &nused); + ckd_free(ctx_id); + + return prob; +} + +int32 +ngram_score_to_prob(ngram_model_t *base, int32 score) +{ + int32 prob; + + /* Undo insertion penalty. */ + prob = score - base->log_wip; + /* Undo language weight. */ + prob = (int32)(prob / base->lw); + + return prob; +} + +int32 +ngram_unknown_wid(ngram_model_t *model) +{ + int32 val; + + /* FIXME: This could be memoized for speed if necessary. */ + /* Look up , if not found return NGRAM_INVALID_WID. */ + if (hash_table_lookup_int32(model->wid, "", &val) == -1) + return NGRAM_INVALID_WID; + else + return val; +} + +int32 +ngram_zero(ngram_model_t *model) +{ + return model->log_zero; +} + +int32 +ngram_model_get_size(ngram_model_t *model) +{ + if (model != NULL) + return model->n; + return 0; +} + +int32 const * +ngram_model_get_counts(ngram_model_t *model) +{ + if (model != NULL) + return model->n_counts; + return NULL; +} + +void +ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model, + int m, int successor) +{ + itor->model = model; + itor->wids = ckd_calloc(model->n, sizeof(*itor->wids)); + itor->m = m; + itor->successor = successor; +} + +ngram_iter_t * +ngram_model_mgrams(ngram_model_t *model, int m) +{ + ngram_iter_t *itor; + /* The fact that m=n-1 is not exactly obvious. Prevent accidents. */ + if (m >= model->n) + return NULL; + if (model->funcs->mgrams == NULL) + return NULL; + itor = (*model->funcs->mgrams)(model, m); + return itor; +} + +ngram_iter_t * +ngram_iter(ngram_model_t *model, const char *word, ...) +{ + va_list history; + const char *hword; + int32 *histid; + int32 n_hist; + ngram_iter_t *itor; + + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) + ++n_hist; + va_end(history); + + histid = ckd_calloc(n_hist, sizeof(*histid)); + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) { + histid[n_hist] = ngram_wid(model, hword); + ++n_hist; + } + va_end(history); + + itor = ngram_ng_iter(model, ngram_wid(model, word), histid, n_hist); + ckd_free(histid); + return itor; +} + +ngram_iter_t * +ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist) +{ + if (n_hist >= model->n) + return NULL; + if (model->funcs->iter == NULL) + return NULL; + return (*model->funcs->iter)(model, wid, history, n_hist); +} + +ngram_iter_t * +ngram_iter_successors(ngram_iter_t *itor) +{ + /* Stop when we are at the highest order N-Gram. */ + if (itor->m == itor->model->n - 1) + return NULL; + return (*itor->model->funcs->successors)(itor); +} + +int32 const * +ngram_iter_get(ngram_iter_t *itor, + int32 *out_score, + int32 *out_bowt) +{ + return (*itor->model->funcs->iter_get)(itor, out_score, out_bowt); +} + +ngram_iter_t * +ngram_iter_next(ngram_iter_t *itor) +{ + return (*itor->model->funcs->iter_next)(itor); +} + +void +ngram_iter_free(ngram_iter_t *itor) +{ + ckd_free(itor->wids); + (*itor->model->funcs->iter_free)(itor); +} + +int32 +ngram_wid(ngram_model_t *model, const char *word) +{ + int32 val; + + if (hash_table_lookup_int32(model->wid, word, &val) == -1) + return ngram_unknown_wid(model); + else + return val; +} + +const char * +ngram_word(ngram_model_t *model, int32 wid) +{ + /* Remove any class tag */ + wid = NGRAM_BASEWID(wid); + if (wid >= model->n_words) + return NULL; + return model->word_str[wid]; +} + +/** + * Add a word to the word string and ID mapping. + */ +int32 +ngram_add_word_internal(ngram_model_t *model, + const char *word, + int32 classid) +{ + + /* Check for hash collisions. */ + int32 wid; + if (hash_table_lookup_int32(model->wid, word, &wid) == 0) { + E_WARN("Omit duplicate word '%s'\n", word); + return wid; + } + + /* Take the next available word ID */ + wid = model->n_words; + if (classid >= 0) { + wid = NGRAM_CLASSWID(wid, classid); + } + + /* Reallocate word_str if necessary. */ + if (model->n_words >= model->n_1g_alloc) { + model->n_1g_alloc += UG_ALLOC_STEP; + model->word_str = ckd_realloc(model->word_str, + sizeof(*model->word_str) * model->n_1g_alloc); + } + /* Add the word string in the appropriate manner. */ + /* Class words are always dynamically allocated. */ + model->word_str[model->n_words] = ckd_salloc(word); + /* Now enter it into the hash table. */ + if (hash_table_enter_int32(model->wid, model->word_str[model->n_words], wid) != wid) { + E_ERROR("Hash insertion failed for word %s => %p (should not happen)\n", + model->word_str[model->n_words], (void *)(long)(wid)); + } + /* Increment number of words. */ + ++model->n_words; + return wid; +} + +int32 +ngram_model_add_word(ngram_model_t *model, + const char *word, float32 weight) +{ + int32 wid, prob = model->log_zero; + + /* If we add word to unwritable model, we need to make it writable */ + if (!model->writable) { + E_WARN("Can't add word '%s' to read-only language model. " + "Disable mmap with '-mmap no' to make it writable\n", word); + return -1; + } + + wid = ngram_add_word_internal(model, word, -1); + if (wid == NGRAM_INVALID_WID) + return wid; + + /* Do what needs to be done to add the word to the unigram. */ + if (model->funcs && model->funcs->add_ug) + prob = (*model->funcs->add_ug)(model, wid, logmath_log(model->lmath, weight)); + if (prob == 0) + return -1; + + return wid; +} + +ngram_class_t * +ngram_class_new(ngram_model_t *model, int32 tag_wid, int32 start_wid, glist_t classwords) +{ + ngram_class_t *lmclass; + gnode_t *gn; + float32 tprob; + int i; + + lmclass = ckd_calloc(1, sizeof(*lmclass)); + lmclass->tag_wid = tag_wid; + /* wid_base is the wid (minus class tag) of the first word in the list. */ + lmclass->start_wid = start_wid; + lmclass->n_words = glist_count(classwords); + lmclass->prob1 = ckd_calloc(lmclass->n_words, sizeof(*lmclass->prob1)); + lmclass->nword_hash = NULL; + lmclass->n_hash = 0; + tprob = 0.0; + for (gn = classwords; gn; gn = gnode_next(gn)) { + tprob += gnode_float32(gn); + } + if (tprob > 1.1 || tprob < 0.9) { + E_INFO("Total class probability is %f, will normalize\n", tprob); + for (gn = classwords; gn; gn = gnode_next(gn)) { + gn->data.fl /= tprob; + } + } + for (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) { + lmclass->prob1[i] = logmath_log(model->lmath, gnode_float32(gn)); + } + + return lmclass; +} + +int32 +ngram_class_add_word(ngram_class_t *lmclass, int32 wid, int32 lweight) +{ + int32 hash; + + if (lmclass->nword_hash == NULL) { + /* Initialize everything in it to -1 */ + lmclass->nword_hash = ckd_malloc(NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash)); + memset(lmclass->nword_hash, 0xff, NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash)); + lmclass->n_hash = NGRAM_HASH_SIZE; + lmclass->n_hash_inuse = 0; + } + /* Stupidest possible hash function. This will work pretty well + * when this function is called repeatedly with contiguous word + * IDs, though... */ + hash = wid & (lmclass->n_hash - 1); + if (lmclass->nword_hash[hash].wid == -1) { + /* Good, no collision. */ + lmclass->nword_hash[hash].wid = wid; + lmclass->nword_hash[hash].prob1 = lweight; + ++lmclass->n_hash_inuse; + return hash; + } + else { + int32 next; /**< Next available bucket. */ + /* Collision... Find the end of the hash chain. */ + while (lmclass->nword_hash[hash].next != -1) + hash = lmclass->nword_hash[hash].next; + assert(hash != -1); + /* Does we has any more bukkit? */ + if (lmclass->n_hash_inuse == lmclass->n_hash) { + /* Oh noes! Ok, we makes more. */ + lmclass->nword_hash = ckd_realloc(lmclass->nword_hash, + lmclass->n_hash * 2 * sizeof(*lmclass->nword_hash)); + memset(lmclass->nword_hash + lmclass->n_hash, + 0xff, lmclass->n_hash * sizeof(*lmclass->nword_hash)); + /* Just use the next allocated one (easy) */ + next = lmclass->n_hash; + lmclass->n_hash *= 2; + } + else { + /* Look for any available bucket. We hope this doesn't happen. */ + for (next = 0; next < lmclass->n_hash; ++next) + if (lmclass->nword_hash[next].wid == -1) + break; + /* This should absolutely not happen. */ + assert(next != lmclass->n_hash); + } + lmclass->nword_hash[next].wid = wid; + lmclass->nword_hash[next].prob1 = lweight; + lmclass->nword_hash[hash].next = next; + ++lmclass->n_hash_inuse; + return next; + } +} + +void +ngram_class_free(ngram_class_t *lmclass) +{ + ckd_free(lmclass->nword_hash); + ckd_free(lmclass->prob1); + ckd_free(lmclass); +} + +int32 +ngram_model_add_class_word(ngram_model_t *model, + const char *classname, + const char *word, + float32 weight) +{ + ngram_class_t *lmclass; + int32 classid, tag_wid, wid, i, scale; + float32 fprob; + + /* Find the class corresponding to classname. Linear search + * probably okay here since there won't be very many classes, and + * this doesn't have to be fast. */ + tag_wid = ngram_wid(model, classname); + if (tag_wid == NGRAM_INVALID_WID) { + E_ERROR("No such word or class tag: %s\n", classname); + return tag_wid; + } + for (classid = 0; classid < model->n_classes; ++classid) { + if (model->classes[classid]->tag_wid == tag_wid) + break; + } + /* Hmm, no such class. It's probably not a good idea to create one. */ + if (classid == model->n_classes) { + E_ERROR("Word %s is not a class tag (call ngram_model_add_class() first)\n", classname); + return NGRAM_INVALID_WID; + } + lmclass = model->classes[classid]; + + /* Add this word to the model's set of words. */ + wid = ngram_add_word_internal(model, word, classid); + if (wid == NGRAM_INVALID_WID) + return wid; + + /* This is the fixed probability of the new word. */ + fprob = weight * 1.0f / (lmclass->n_words + lmclass->n_hash_inuse + 1); + /* Now normalize everything else to fit it in. This is + * accomplished by simply scaling all the other probabilities + * by (1-fprob). */ + scale = logmath_log(model->lmath, 1.0 - fprob); + for (i = 0; i < lmclass->n_words; ++i) + lmclass->prob1[i] += scale; + for (i = 0; i < lmclass->n_hash; ++i) + if (lmclass->nword_hash[i].wid != -1) + lmclass->nword_hash[i].prob1 += scale; + + /* Now add it to the class hash table. */ + return ngram_class_add_word(lmclass, wid, logmath_log(model->lmath, fprob)); +} + +int32 +ngram_model_add_class(ngram_model_t *model, + const char *classname, + float32 classweight, + char **words, + const float32 *weights, + int32 n_words) +{ + ngram_class_t *lmclass; + glist_t classwords = NULL; + int32 i, start_wid = -1; + int32 classid, tag_wid; + + /* Check if classname already exists in model. If not, add it.*/ + if ((tag_wid = ngram_wid(model, classname)) == ngram_unknown_wid(model)) { + tag_wid = ngram_model_add_word(model, classname, classweight); + if (tag_wid == NGRAM_INVALID_WID) + return -1; + } + + if (model->n_classes == 128) { + E_ERROR("Number of classes cannot exceed 128 (sorry)\n"); + return -1; + } + classid = model->n_classes; + for (i = 0; i < n_words; ++i) { + int32 wid; + + wid = ngram_add_word_internal(model, words[i], classid); + if (wid == NGRAM_INVALID_WID) + return -1; + if (start_wid == -1) + start_wid = NGRAM_BASEWID(wid); + classwords = glist_add_float32(classwords, weights[i]); + } + classwords = glist_reverse(classwords); + lmclass = ngram_class_new(model, tag_wid, start_wid, classwords); + glist_free(classwords); + if (lmclass == NULL) + return -1; + + ++model->n_classes; + if (model->classes == NULL) + model->classes = ckd_calloc(1, sizeof(*model->classes)); + else + model->classes = ckd_realloc(model->classes, + model->n_classes * sizeof(*model->classes)); + model->classes[classid] = lmclass; + return classid; +} + +int32 +ngram_class_prob(ngram_class_t *lmclass, int32 wid) +{ + int32 base_wid = NGRAM_BASEWID(wid); + + if (base_wid < lmclass->start_wid + || base_wid > lmclass->start_wid + lmclass->n_words) { + int32 hash; + + /* Look it up in the hash table. */ + hash = wid & (lmclass->n_hash - 1); + while (hash != -1 && lmclass->nword_hash[hash].wid != wid) + hash = lmclass->nword_hash[hash].next; + if (hash == -1) + return 1; + return lmclass->nword_hash[hash].prob1; + } + else { + return lmclass->prob1[base_wid - lmclass->start_wid]; + } +} + +int32 +read_classdef_file(hash_table_t *classes, const char *file_name) +{ + FILE *fp; + int32 is_pipe; + int inclass; /**< Are we currently reading a list of class words? */ + int32 rv = -1; + gnode_t *gn; + glist_t classwords = NULL; + glist_t classprobs = NULL; + char *classname = NULL; + + if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { + E_ERROR("File %s not found\n", file_name); + return -1; + } + + inclass = FALSE; + while (!feof(fp)) { + char line[512]; + char *wptr[2]; + int n_words; + + if (fgets(line, sizeof(line), fp) == NULL) + break; + + n_words = str2words(line, wptr, 2); + if (n_words <= 0) + continue; + + if (inclass) { + /* Look for an end of class marker. */ + if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { + classdef_t *classdef; + gnode_t *word, *weight; + int32 i; + + if (classname == NULL || 0 != strcmp(wptr[1], classname)) + goto error_out; + inclass = FALSE; + + /* Construct a class from the list of words collected. */ + classdef = ckd_calloc(1, sizeof(*classdef)); + classwords = glist_reverse(classwords); + classprobs = glist_reverse(classprobs); + classdef->n_words = glist_count(classwords); + classdef->words = ckd_calloc(classdef->n_words, + sizeof(*classdef->words)); + classdef->weights = ckd_calloc(classdef->n_words, + sizeof(*classdef->weights)); + word = classwords; + weight = classprobs; + for (i = 0; i < classdef->n_words; ++i) { + classdef->words[i] = gnode_ptr(word); + classdef->weights[i] = gnode_float32(weight); + word = gnode_next(word); + weight = gnode_next(weight); + } + + /* Add this class to the hash table. */ + if (hash_table_enter(classes, classname, classdef) != classdef) { + classdef_free(classdef); + goto error_out; + } + + /* Reset everything. */ + glist_free(classwords); + glist_free(classprobs); + classwords = NULL; + classprobs = NULL; + classname = NULL; + } + else { + float32 fprob; + + if (n_words == 2) + fprob = (float32)atof_c(wptr[1]); + else + fprob = 1.0f; + /* Add it to the list of words for this class. */ + classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); + classprobs = glist_add_float32(classprobs, fprob); + } + } + else { + /* Start a new LM class if the LMCLASS marker is seen */ + if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { + if (inclass) + goto error_out; + inclass = TRUE; + classname = ckd_salloc(wptr[1]); + } + /* Otherwise, just ignore whatever junk we got */ + } + } + rv = 0; /* Success. */ + +error_out: + /* Free all the stuff we might have allocated. */ + fclose_comp(fp, is_pipe); + for (gn = classwords; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(classwords); + glist_free(classprobs); + ckd_free(classname); + + return rv; +} + +void +classdef_free(classdef_t *classdef) +{ + int32 i; + for (i = 0; i < classdef->n_words; ++i) + ckd_free(classdef->words[i]); + ckd_free(classdef->words); + ckd_free(classdef->weights); + ckd_free(classdef); +} + + +int32 +ngram_model_read_classdef(ngram_model_t *model, + const char *file_name) +{ + hash_table_t *classes; + glist_t hl = NULL; + gnode_t *gn; + int32 rv = -1; + + classes = hash_table_new(0, FALSE); + if (read_classdef_file(classes, file_name) < 0) { + hash_table_free(classes); + return -1; + } + + /* Create a new class in the language model for each classdef. */ + hl = hash_table_tolist(classes, NULL); + for (gn = hl; gn; gn = gnode_next(gn)) { + hash_entry_t *he = gnode_ptr(gn); + classdef_t *classdef = he->val; + + if (ngram_model_add_class(model, he->key, 1.0, + classdef->words, + classdef->weights, + classdef->n_words) < 0) + goto error_out; + } + rv = 0; + +error_out: + for (gn = hl; gn; gn = gnode_next(gn)) { + hash_entry_t *he = gnode_ptr(gn); + ckd_free((char *)he->key); + classdef_free(he->val); + } + glist_free(hl); + hash_table_free(classes); + return rv; +} diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c new file mode 100644 index 000000000..a4b72cb00 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.c @@ -0,0 +1,660 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model_arpa.c ARPA format language models + * + * Author: David Huggins-Daines + */ + +#include "sphinxbase/ckd_alloc.h" +#include +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/listelem_alloc.h" +#include "sphinxbase/strfuncs.h" + +#include "ngram_model_arpa.h" + +static ngram_funcs_t ngram_model_arpa_funcs; + +#define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ]) +#define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams) +#define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams)) + +/* + * Read and return #unigrams, #bigrams, #trigrams as stated in input file. + */ +static int +ReadNgramCounts(lineiter_t **li, int32 * n_ug, int32 * n_bg, int32 * n_tg) +{ + int32 ngram, ngram_cnt; + + /* skip file until past the '\data\' marker */ + while (*li) { + string_trim((*li)->buf, STRING_BOTH); + if (strcmp((*li)->buf, "\\data\\") == 0) + break; + *li = lineiter_next(*li); + } + if (*li == NULL || strcmp((*li)->buf, "\\data\\") != 0) { + E_INFO("No \\data\\ mark in LM file\n"); + return -1; + } + + *n_ug = *n_bg = *n_tg = 0; + while ((*li = lineiter_next(*li))) { + if (sscanf((*li)->buf, "ngram %d=%d", &ngram, &ngram_cnt) != 2) + break; + switch (ngram) { + case 1: + *n_ug = ngram_cnt; + break; + case 2: + *n_bg = ngram_cnt; + break; + case 3: + *n_tg = ngram_cnt; + break; + default: + E_ERROR("Unknown ngram (%d)\n", ngram); + return -1; + } + } + if (*li == NULL) { + E_ERROR("EOF while reading ngram counts\n"); + return -1; + } + + /* Position iterator to the unigrams header '\1-grams:\' */ + while ((*li = lineiter_next(*li))) { + string_trim((*li)->buf, STRING_BOTH); + if (strcmp((*li)->buf, "\\1-grams:") == 0) + break; + } + if (*li == NULL) { + E_ERROR_SYSTEM("Failed to read \\1-grams: mark"); + return -1; + } + + if ((*n_ug <= 0) || (*n_bg < 0) || (*n_tg < 0)) { + E_ERROR("Bad or missing ngram count\n"); + return -1; + } + return 0; +} + +/* + * Read in the unigrams from given file into the LM structure model. + * On entry to this procedure, the iterator is positioned to the + * header line '\1-grams:'. + */ +static int +ReadUnigrams(lineiter_t **li, ngram_model_arpa_t * model) +{ + ngram_model_t *base = &model->base; + int32 wcnt; + float p1; + + E_INFO("Reading unigrams\n"); + + wcnt = 0; + while ((*li = lineiter_next(*li))) { + char *wptr[3], *name; + float32 bo_wt = 0.0f; + int n; + + string_trim((*li)->buf, STRING_BOTH); + if (strcmp((*li)->buf, "\\2-grams:") == 0 + || strcmp((*li)->buf, "\\end\\") == 0) + break; + + if ((n = str2words((*li)->buf, wptr, 3)) < 2) { + if ((*li)->buf[0] != '\0') + E_WARN("Format error; unigram ignored: %s\n", (*li)->buf); + continue; + } + else { + p1 = (float)atof_c(wptr[0]); + name = wptr[1]; + if (n == 3) + bo_wt = (float)atof_c(wptr[2]); + } + + if (wcnt >= base->n_counts[0]) { + E_ERROR("Too many unigrams\n"); + return -1; + } + + /* Associate name with word id */ + base->word_str[wcnt] = ckd_salloc(name); + if ((hash_table_enter(base->wid, base->word_str[wcnt], (void *)(long)wcnt)) + != (void *)(long)wcnt) { + E_WARN("Duplicate word in dictionary: %s\n", base->word_str[wcnt]); + } + model->lm3g.unigrams[wcnt].prob1.l = logmath_log10_to_log(base->lmath, p1); + model->lm3g.unigrams[wcnt].bo_wt1.l = logmath_log10_to_log(base->lmath, bo_wt); + wcnt++; + } + + if (base->n_counts[0] != wcnt) { + E_WARN("lm_t.ucount(%d) != #unigrams read(%d)\n", + base->n_counts[0], wcnt); + base->n_counts[0] = wcnt; + base->n_words = wcnt; + } + return 0; +} + +/* + * Read bigrams from given file into given model structure. + */ +static int +ReadBigrams(lineiter_t **li, ngram_model_arpa_t * model) +{ + ngram_model_t *base = &model->base; + int32 w1, w2, prev_w1, bgcount; + bigram_t *bgptr; + + E_INFO("Reading bigrams\n"); + + bgcount = 0; + bgptr = model->lm3g.bigrams; + prev_w1 = -1; + + while ((*li = lineiter_next(*li))) { + float32 p, bo_wt = 0.0f; + int32 p2, bo_wt2; + char *wptr[4], *word1, *word2; + int n; + + string_trim((*li)->buf, STRING_BOTH); + wptr[3] = NULL; + if ((n = str2words((*li)->buf, wptr, 4)) < 3) { + if ((*li)->buf[0] != '\0') + break; + continue; + } + else { + p = (float32)atof_c(wptr[0]); + word1 = wptr[1]; + word2 = wptr[2]; + if (wptr[3]) + bo_wt = (float32)atof_c(wptr[3]); + } + + if ((w1 = ngram_wid(base, word1)) == NGRAM_INVALID_WID) { + E_ERROR("Unknown word: %s, skipping bigram (%s %s)\n", + word1, word1, word2); + continue; + } + if ((w2 = ngram_wid(base, word2)) == NGRAM_INVALID_WID) { + E_ERROR("Unknown word: %s, skipping bigram (%s %s)\n", + word2, word1, word2); + continue; + } + + /* FIXME: Should use logmath_t quantization here. */ + /* HACK!! to quantize probs to 4 decimal digits */ + p = (float32)((int32)(p * 10000)) / 10000; + bo_wt = (float32)((int32)(bo_wt * 10000)) / 10000; + + p2 = logmath_log10_to_log(base->lmath, p); + bo_wt2 = logmath_log10_to_log(base->lmath, bo_wt); + + if (bgcount >= base->n_counts[1]) { + E_ERROR("Too many bigrams\n"); + return -1; + } + + bgptr->wid = w2; + bgptr->prob2 = sorted_id(&model->sorted_prob2, &p2); + if (base->n_counts[2] > 0) + bgptr->bo_wt2 = sorted_id(&model->sorted_bo_wt2, &bo_wt2); + + if (w1 != prev_w1) { + if (w1 < prev_w1) { + E_ERROR("Bigram %s %s not in unigram order word id: %d prev word id: %d\n", word1, word2, w1, prev_w1); + return -1; + } + + for (prev_w1++; prev_w1 <= w1; prev_w1++) + model->lm3g.unigrams[prev_w1].bigrams = bgcount; + prev_w1 = w1; + } + bgcount++; + bgptr++; + + if ((bgcount & 0x0000ffff) == 0) { + E_INFOCONT("."); + } + } + if (*li == NULL || ((strcmp((*li)->buf, "\\end\\") != 0) + && (strcmp((*li)->buf, "\\3-grams:") != 0))) { + E_ERROR("Bad bigram: %s\n", (*li)->buf); + return -1; + } + + for (prev_w1++; prev_w1 <= base->n_counts[0]; prev_w1++) + model->lm3g.unigrams[prev_w1].bigrams = bgcount; + + return 0; +} + +/* + * Very similar to ReadBigrams. + */ +static int +ReadTrigrams(lineiter_t **li, ngram_model_arpa_t * model) +{ + ngram_model_t *base = &model->base; + int32 i, w1, w2, w3, prev_w1, prev_w2, tgcount, prev_bg, bg, endbg; + int32 seg, prev_seg, prev_seg_lastbg; + trigram_t *tgptr; + bigram_t *bgptr; + + E_INFO("Reading trigrams\n"); + + tgcount = 0; + tgptr = model->lm3g.trigrams; + prev_w1 = -1; + prev_w2 = -1; + prev_bg = -1; + prev_seg = -1; + + while ((*li = lineiter_next(*li))) { + float32 p; + int32 p3; + char *wptr[4], *word1, *word2, *word3; + + string_trim((*li)->buf, STRING_BOTH); + if (str2words((*li)->buf, wptr, 4) != 4) { + if ((*li)->buf[0] != '\0') + break; + continue; + } + else { + p = (float32)atof_c(wptr[0]); + word1 = wptr[1]; + word2 = wptr[2]; + word3 = wptr[3]; + } + + if ((w1 = ngram_wid(base, word1)) == NGRAM_INVALID_WID) { + E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n", + word1, word1, word2, word3); + continue; + } + if ((w2 = ngram_wid(base, word2)) == NGRAM_INVALID_WID) { + E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n", + word2, word1, word2, word3); + continue; + } + if ((w3 = ngram_wid(base, word3)) == NGRAM_INVALID_WID) { + E_ERROR("Unknown word: %s, skipping trigram (%s %s %s)\n", + word3, word1, word2, word3); + continue; + } + + /* FIXME: Should use logmath_t quantization here. */ + /* HACK!! to quantize probs to 4 decimal digits */ + p = (float32)((int32)(p * 10000)) / 10000; + p3 = logmath_log10_to_log(base->lmath, p); + + if (tgcount >= base->n_counts[2]) { + E_ERROR("Too many trigrams\n"); + return -1; + } + + tgptr->wid = w3; + tgptr->prob3 = sorted_id(&model->sorted_prob3, &p3); + + if ((w1 != prev_w1) || (w2 != prev_w2)) { + /* Trigram for a new bigram; update tg info for all previous bigrams */ + if ((w1 < prev_w1) || ((w1 == prev_w1) && (w2 < prev_w2))) { + E_ERROR("Trigrams not in bigram order\n"); + return -1; + } + + bg = (w1 != + prev_w1) ? model->lm3g.unigrams[w1].bigrams : prev_bg + 1; + endbg = model->lm3g.unigrams[w1 + 1].bigrams; + bgptr = model->lm3g.bigrams + bg; + for (; (bg < endbg) && (bgptr->wid != w2); bg++, bgptr++); + if (bg >= endbg) { + E_ERROR("Missing bigram for trigram: %s", (*li)->buf); + return -1; + } + + /* bg = bigram entry index for . Update tseg_base */ + seg = bg >> LOG_BG_SEG_SZ; + for (i = prev_seg + 1; i <= seg; i++) + model->lm3g.tseg_base[i] = tgcount; + + /* Update trigrams pointers for all bigrams until bg */ + if (prev_seg < seg) { + int32 tgoff = 0; + + if (prev_seg >= 0) { + tgoff = tgcount - model->lm3g.tseg_base[prev_seg]; + if (tgoff > 65535) { + E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n"); + return -1; + } + } + + prev_seg_lastbg = ((prev_seg + 1) << LOG_BG_SEG_SZ) - 1; + bgptr = model->lm3g.bigrams + prev_bg; + for (++prev_bg, ++bgptr; prev_bg <= prev_seg_lastbg; + prev_bg++, bgptr++) + bgptr->trigrams = tgoff; + + for (; prev_bg <= bg; prev_bg++, bgptr++) + bgptr->trigrams = 0; + } + else { + int32 tgoff; + + tgoff = tgcount - model->lm3g.tseg_base[prev_seg]; + if (tgoff > 65535) { + E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n"); + return -1; + } + + bgptr = model->lm3g.bigrams + prev_bg; + for (++prev_bg, ++bgptr; prev_bg <= bg; prev_bg++, bgptr++) + bgptr->trigrams = tgoff; + } + + prev_w1 = w1; + prev_w2 = w2; + prev_bg = bg; + prev_seg = seg; + } + + tgcount++; + tgptr++; + + if ((tgcount & 0x0000ffff) == 0) { + E_INFOCONT("."); + } + } + if (*li == NULL || strcmp((*li)->buf, "\\end\\") != 0) { + E_ERROR("Bad trigram: %s\n", (*li)->buf); + return -1; + } + + for (prev_bg++; prev_bg <= base->n_counts[1]; prev_bg++) { + if ((prev_bg & (BG_SEG_SZ - 1)) == 0) + model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ] = tgcount; + if ((tgcount - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ]) > 65535) { + E_ERROR("Size of trigram segment is bigger than 65535, such a big language models are not supported, use smaller vocabulary\n"); + return -1; + } + model->lm3g.bigrams[prev_bg].trigrams = + tgcount - model->lm3g.tseg_base[prev_bg >> LOG_BG_SEG_SZ]; + } + return 0; +} + +static unigram_t * +new_unigram_table(int32 n_ug) +{ + unigram_t *table; + int32 i; + + table = ckd_calloc(n_ug, sizeof(unigram_t)); + for (i = 0; i < n_ug; i++) { + table[i].prob1.l = INT_MIN; + table[i].bo_wt1.l = INT_MIN; + } + return table; +} + +ngram_model_t * +ngram_model_arpa_read(cmd_ln_t *config, + const char *file_name, + logmath_t *lmath) +{ + lineiter_t *li; + FILE *fp; + int32 is_pipe; + int32 n_unigram; + int32 n_bigram; + int32 n_trigram; + int32 n; + ngram_model_arpa_t *model; + ngram_model_t *base; + + if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { + E_ERROR("File %s not found\n", file_name); + return NULL; + } + li = lineiter_start(fp); + + /* Read #unigrams, #bigrams, #trigrams from file */ + if (ReadNgramCounts(&li, &n_unigram, &n_bigram, &n_trigram) == -1) { + lineiter_free(li); + fclose_comp(fp, is_pipe); + return NULL; + } + E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram); + + /* Allocate space for LM, including initial OOVs and placeholders; initialize it */ + model = ckd_calloc(1, sizeof(*model)); + base = &model->base; + if (n_trigram > 0) + n = 3; + else if (n_bigram > 0) + n = 2; + else + n = 1; + /* Initialize base model. */ + ngram_model_init(base, &ngram_model_arpa_funcs, lmath, n, n_unigram); + base->n_counts[0] = n_unigram; + base->n_counts[1] = n_bigram; + base->n_counts[2] = n_trigram; + base->writable = TRUE; + + /* + * Allocate one extra unigram and bigram entry: sentinels to terminate + * followers (bigrams and trigrams, respectively) of previous entry. + */ + model->lm3g.unigrams = new_unigram_table(n_unigram + 1); + model->lm3g.bigrams = + ckd_calloc(n_bigram + 1, sizeof(bigram_t)); + if (n_trigram > 0) + model->lm3g.trigrams = + ckd_calloc(n_trigram, sizeof(trigram_t)); + + if (n_trigram > 0) { + model->lm3g.tseg_base = + ckd_calloc((n_bigram + 1) / BG_SEG_SZ + 1, + sizeof(int32)); + } + if (ReadUnigrams(&li, model) == -1) { + fclose_comp(fp, is_pipe); + ngram_model_free(base); + return NULL; + } + E_INFO("%8d = #unigrams created\n", base->n_counts[0]); + + if (base->n_counts[2] > 0) + init_sorted_list(&model->sorted_bo_wt2); + + if (base->n_counts[1] > 0) { + init_sorted_list(&model->sorted_prob2); + + if (ReadBigrams(&li, model) == -1) { + fclose_comp(fp, is_pipe); + ngram_model_free(base); + return NULL; + } + + base->n_counts[1] = FIRST_BG(model, base->n_counts[0]); + model->lm3g.n_prob2 = model->sorted_prob2.free; + model->lm3g.prob2 = vals_in_sorted_list(&model->sorted_prob2); + free_sorted_list(&model->sorted_prob2); + E_INFO("%8d = #bigrams created\n", base->n_counts[1]); + E_INFO("%8d = #prob2 entries\n", model->lm3g.n_prob2); + } + + if (base->n_counts[2] > 0) { + /* Create trigram bo-wts array */ + model->lm3g.n_bo_wt2 = model->sorted_bo_wt2.free; + model->lm3g.bo_wt2 = vals_in_sorted_list(&model->sorted_bo_wt2); + free_sorted_list(&model->sorted_bo_wt2); + E_INFO("%8d = #bo_wt2 entries\n", model->lm3g.n_bo_wt2); + + init_sorted_list(&model->sorted_prob3); + + if (ReadTrigrams(&li, model) == -1) { + fclose_comp(fp, is_pipe); + ngram_model_free(base); + return NULL; + } + + base->n_counts[2] = FIRST_TG(model, base->n_counts[1]); + model->lm3g.n_prob3 = model->sorted_prob3.free; + model->lm3g.prob3 = vals_in_sorted_list(&model->sorted_prob3); + E_INFO("%8d = #trigrams created\n", base->n_counts[2]); + E_INFO("%8d = #prob3 entries\n", model->lm3g.n_prob3); + + free_sorted_list(&model->sorted_prob3); + + /* Initialize tginfo */ + model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *)); + model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t)); + } + + lineiter_free(li); + fclose_comp(fp, is_pipe); + return base; +} + +int +ngram_model_arpa_write(ngram_model_t *model, + const char *file_name) +{ + ngram_iter_t *itor; + FILE *fh; + int i; + + if ((fh = fopen(file_name, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open %s for writing", file_name); + return -1; + } + fprintf(fh, "This is an ARPA-format language model file, generated by CMU Sphinx\n"); + + /* The ARPA format doesn't require any extra information that + * N-Gram iterators can't give us, so this is very + * straightforward compared with DMP writing. */ + + /* Write N-gram counts. */ + fprintf(fh, "\\data\\\n"); + for (i = 0; i < model->n; ++i) { + fprintf(fh, "ngram %d=%d\n", i+1, model->n_counts[i]); + } + + /* Write N-grams */ + for (i = 0; i < model->n; ++i) { + fprintf(fh, "\n\\%d-grams:\n", i + 1); + for (itor = ngram_model_mgrams(model, i); itor; itor = ngram_iter_next(itor)) { + int32 const *wids; + int32 score, bowt; + int j; + + wids = ngram_iter_get(itor, &score, &bowt); + fprintf(fh, "%.4f ", logmath_log_to_log10(model->lmath, score)); + for (j = 0; j <= i; ++j) { + assert(wids[j] < model->n_counts[0]); + fprintf(fh, "%s ", model->word_str[wids[j]]); + } + if (i < model->n-1) + fprintf(fh, "%.4f", logmath_log_to_log10(model->lmath, bowt)); + fprintf(fh, "\n"); + } + } + fprintf(fh, "\n\\end\\\n"); + return fclose(fh); +} + +static int +ngram_model_arpa_apply_weights(ngram_model_t *base, float32 lw, + float32 wip, float32 uw) +{ + ngram_model_arpa_t *model = (ngram_model_arpa_t *)base; + lm3g_apply_weights(base, &model->lm3g, lw, wip, uw); + return 0; +} + +/* Lousy "templating" for things that are largely the same in DMP and + * ARPA models, except for the bigram and trigram types and some + * names. */ +#define NGRAM_MODEL_TYPE ngram_model_arpa_t +#include "lm3g_templates.c" + +static void +ngram_model_arpa_free(ngram_model_t *base) +{ + ngram_model_arpa_t *model = (ngram_model_arpa_t *)base; + ckd_free(model->lm3g.unigrams); + ckd_free(model->lm3g.bigrams); + ckd_free(model->lm3g.trigrams); + ckd_free(model->lm3g.prob2); + ckd_free(model->lm3g.bo_wt2); + ckd_free(model->lm3g.prob3); + lm3g_tginfo_free(base, &model->lm3g); + ckd_free(model->lm3g.tseg_base); +} + +static ngram_funcs_t ngram_model_arpa_funcs = { + ngram_model_arpa_free, /* free */ + ngram_model_arpa_apply_weights, /* apply_weights */ + lm3g_template_score, /* score */ + lm3g_template_raw_score, /* raw_score */ + lm3g_template_add_ug, /* add_ug */ + lm3g_template_flush, /* flush */ + lm3g_template_iter, /* iter */ + lm3g_template_mgrams, /* mgrams */ + lm3g_template_successors, /* successors */ + lm3g_template_iter_get, /* iter_get */ + lm3g_template_iter_next, /* iter_next */ + lm3g_template_iter_free /* iter_free */ +}; diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h new file mode 100644 index 000000000..2fd9e427d --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_arpa.h @@ -0,0 +1,86 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model_arpa.h ARPABO text format for N-Gram models + * + * Author: David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_ARPA_H__ +#define __NGRAM_MODEL_ARPA_H__ + +#include "ngram_model_internal.h" +#include "lm3g_model.h" + +/** + * Bigram structure. + */ +struct bigram_s { + uint32 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ + uint16 prob2; /**< Index into array of actual bigram probs */ + uint16 bo_wt2; /**< Index into array of actual bigram backoff wts */ + uint16 trigrams; /**< Index of 1st entry in lm_t.trigrams[], + RELATIVE TO its segment base (see above) */ +}; + +/** + * Trigram structure. + * + * As with bigrams, trigram prob info kept in a separate table for conserving + * memory space. + */ +struct trigram_s { + uint32 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ + uint16 prob3; /**< Index into array of actual trigram probs */ +}; + + +/** + * Subclass of ngram_model for ARPA file reading. + */ +typedef struct ngram_model_arpa_s { + ngram_model_t base; /**< Base ngram_model_t structure */ + lm3g_model_t lm3g; /**< Shared lm3g structure */ + + /* Arrays of unique bigram probs and bo-wts, and trigram probs + * (these are temporary, actually) */ + sorted_list_t sorted_prob2; + sorted_list_t sorted_bo_wt2; + sorted_list_t sorted_prob3; +} ngram_model_arpa_t; + +#endif /* __NGRAM_MODEL_ARPA_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c new file mode 100644 index 000000000..c6a2d8b85 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.c @@ -0,0 +1,969 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model_dmp.c DMP format language models + * + * Author: David Huggins-Daines + */ + +#include +#include +#include +#include +#include + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/err.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/listelem_alloc.h" + +#include "ngram_model_dmp.h" + +static const char darpa_hdr[] = "Darpa Trigram LM"; +static ngram_funcs_t ngram_model_dmp_funcs; + +#define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ]) +#define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams) +#define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams)) + +static unigram_t * +new_unigram_table(int32 n_ug) +{ + unigram_t *table; + int32 i; + + table = ckd_calloc(n_ug, sizeof(unigram_t)); + for (i = 0; i < n_ug; i++) { + table[i].prob1.f = -99.0; + table[i].bo_wt1.f = -99.0; + } + return table; +} + +ngram_model_t * +ngram_model_dmp_read(cmd_ln_t *config, + const char *file_name, + logmath_t *lmath) +{ + ngram_model_t *base; + ngram_model_dmp_t *model; + FILE *fp; + int do_mmap, do_swap; + int32 is_pipe; + int32 i, j, k, vn, n, ts; + int32 n_unigram; + int32 n_bigram; + int32 n_trigram; + char str[1024]; + unigram_t *ugptr; + bigram_t *bgptr; + trigram_t *tgptr; + char *tmp_word_str; + char *map_base = NULL; + size_t offset = 0; + + base = NULL; + do_mmap = FALSE; + if (config) + do_mmap = cmd_ln_boolean_r(config, "-mmap"); + + if ((fp = fopen_comp(file_name, "rb", &is_pipe)) == NULL) { + E_ERROR("Dump file %s not found\n", file_name); + goto error_out; + } + + if (is_pipe && do_mmap) { + E_WARN("Dump file is compressed, will not use memory-mapped I/O\n"); + do_mmap = 0; + } + + do_swap = FALSE; + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (k != strlen(darpa_hdr)+1) { + SWAP_INT32(&k); + if (k != strlen(darpa_hdr)+1) { + E_ERROR("Wrong magic header size number %x: %s is not a dump file\n", k, file_name); + goto error_out; + } + do_swap = 1; + } + if (fread(str, 1, k, fp) != (size_t) k) { + E_ERROR("Cannot read header\n"); + goto error_out; + } + if (strncmp(str, darpa_hdr, k) != 0) { + E_ERROR("Wrong header %s: %s is not a dump file\n", darpa_hdr); + goto error_out; + } + + if (do_mmap) { + if (do_swap) { + E_INFO + ("Byteswapping required, will not use memory-mapped I/O for LM file\n"); + do_mmap = 0; + } + else { + E_INFO("Will use memory-mapped I/O for LM file\n"); +#ifdef __ADSPBLACKFIN__ /* This is true for both VisualDSP++ and uClinux. */ + E_FATAL("memory mapping is not supported at the moment."); +#else +#endif + } + } + + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + if (fread(str, 1, k, fp) != (size_t) k) { + E_ERROR("Cannot read LM filename in header\n"); + goto error_out; + } + + /* read version#, if present (must be <= 0) */ + if (fread(&vn, sizeof(vn), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&vn); + if (vn <= 0) { + /* read and don't compare timestamps (we don't care) */ + if (fread(&ts, sizeof(ts), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&ts); + + /* read and skip format description */ + for (;;) { + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + if (k == 0) + break; + if (fread(str, 1, k, fp) != (size_t) k) { + E_ERROR("Failed to read word\n"); + goto error_out; + } + } + /* read model->ucount */ + if (fread(&n_unigram, sizeof(n_unigram), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&n_unigram); + } + else { + n_unigram = vn; + } + + /* read model->bcount, tcount */ + if (fread(&n_bigram, sizeof(n_bigram), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&n_bigram); + if (fread(&n_trigram, sizeof(n_trigram), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&n_trigram); + E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram); + + /* Allocate space for LM, including initial OOVs and placeholders; initialize it */ + model = ckd_calloc(1, sizeof(*model)); + base = &model->base; + if (n_trigram > 0) + n = 3; + else if (n_bigram > 0) + n = 2; + else + n = 1; + ngram_model_init(base, &ngram_model_dmp_funcs, lmath, n, n_unigram); + base->n_counts[0] = n_unigram; + base->n_counts[1] = n_bigram; + base->n_counts[2] = n_trigram; + + /* read unigrams (always in memory, as they contain dictionary + * mappings that can't be precomputed, and also could have OOVs added) */ + model->lm3g.unigrams = new_unigram_table(n_unigram + 1); + ugptr = model->lm3g.unigrams; + for (i = 0; i <= n_unigram; ++i) { + /* Skip over the mapping ID, we don't care about it. */ + if (fread(ugptr, sizeof(int32), 1, fp) != 1) { + E_ERROR("Failed to read maping id %d\n", i); + goto error_out; + } + /* Read the actual unigram structure. */ + if (fread(ugptr, sizeof(unigram_t), 1, fp) != 1) { + E_ERROR("Failed to read unigrams data\n"); + ngram_model_free(base); + fclose_comp(fp, is_pipe); + return NULL; + } + /* Byte swap if necessary. */ + if (do_swap) { + SWAP_INT32(&ugptr->prob1.l); + SWAP_INT32(&ugptr->bo_wt1.l); + SWAP_INT32(&ugptr->bigrams); + } + /* Convert values to log. */ + ugptr->prob1.l = logmath_log10_to_log(lmath, ugptr->prob1.f); + ugptr->bo_wt1.l = logmath_log10_to_log(lmath, ugptr->bo_wt1.f); + E_DEBUG(2, ("ug %d: prob %d bo %d bigrams %d\n", + i, ugptr->prob1.l, ugptr->bo_wt1.l, ugptr->bigrams)); + ++ugptr; + } + E_INFO("%8d = LM.unigrams(+trailer) read\n", n_unigram); + + /* Now mmap() the file and read in the rest of the (read-only) stuff. */ + if (do_mmap) { + offset = ftell(fp); + + /* Check for improper word alignment. */ + if (offset & 0x3) { + E_WARN("-mmap specified, but trigram index is not word-aligned. Will not memory-map.\n"); + do_mmap = FALSE; + } + else { + model->dump_mmap = mmio_file_read(file_name); + if (model->dump_mmap == NULL) { + do_mmap = FALSE; + } + else { + map_base = mmio_file_ptr(model->dump_mmap); + } + } + } + + if (n_bigram > 0) { + /* read bigrams */ + if (do_mmap) { + model->lm3g.bigrams = (bigram_t *) (map_base + offset); + offset += (n_bigram + 1) * sizeof(bigram_t); + } + else { + model->lm3g.bigrams = + ckd_calloc(n_bigram + 1, sizeof(bigram_t)); + if (fread(model->lm3g.bigrams, sizeof(bigram_t), n_bigram + 1, fp) + != (size_t) n_bigram + 1) { + E_ERROR("Failed to read bigrams data\n"); + goto error_out; + } + if (do_swap) { + for (i = 0, bgptr = model->lm3g.bigrams; i <= n_bigram; + i++, bgptr++) { + SWAP_INT16(&bgptr->wid); + SWAP_INT16(&bgptr->prob2); + SWAP_INT16(&bgptr->bo_wt2); + SWAP_INT16(&bgptr->trigrams); + } + } + } + E_INFO("%8d = LM.bigrams(+trailer) read\n", n_bigram); + } + + /* read trigrams */ + if (n_trigram > 0) { + if (do_mmap) { + model->lm3g.trigrams = (trigram_t *) (map_base + offset); + offset += n_trigram * sizeof(trigram_t); + } + else { + model->lm3g.trigrams = + ckd_calloc(n_trigram, sizeof(trigram_t)); + if (fread + (model->lm3g.trigrams, sizeof(trigram_t), n_trigram, fp) + != (size_t) n_trigram) { + E_ERROR("Failed to read trigrams data\n"); + goto error_out; + } + if (do_swap) { + for (i = 0, tgptr = model->lm3g.trigrams; i < n_trigram; + i++, tgptr++) { + SWAP_INT16(&tgptr->wid); + SWAP_INT16(&tgptr->prob3); + } + } + } + E_INFO("%8d = LM.trigrams read\n", n_trigram); + /* Initialize tginfo */ + model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *)); + model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t)); + } + + if (n_bigram > 0) { + /* read n_prob2 and prob2 array (in memory) */ + if (do_mmap) + fseek(fp, offset, SEEK_SET); + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + model->lm3g.n_prob2 = k; + model->lm3g.prob2 = ckd_calloc(k, sizeof(*model->lm3g.prob2)); + if (fread(model->lm3g.prob2, sizeof(*model->lm3g.prob2), k, fp) != (size_t) k) { + E_ERROR("fread(prob2) failed\n"); + goto error_out; + } + for (i = 0; i < k; i++) { + if (do_swap) + SWAP_INT32(&model->lm3g.prob2[i].l); + /* Convert values to log. */ + model->lm3g.prob2[i].l = logmath_log10_to_log(lmath, model->lm3g.prob2[i].f); + } + E_INFO("%8d = LM.prob2 entries read\n", k); + } + + /* read n_bo_wt2 and bo_wt2 array (in memory) */ + if (base->n > 2) { + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + model->lm3g.n_bo_wt2 = k; + model->lm3g.bo_wt2 = ckd_calloc(k, sizeof(*model->lm3g.bo_wt2)); + if (fread(model->lm3g.bo_wt2, sizeof(*model->lm3g.bo_wt2), k, fp) != (size_t) k) { + E_ERROR("Failed to read backoff weights\n"); + goto error_out; + } + for (i = 0; i < k; i++) { + if (do_swap) + SWAP_INT32(&model->lm3g.bo_wt2[i].l); + /* Convert values to log. */ + model->lm3g.bo_wt2[i].l = logmath_log10_to_log(lmath, model->lm3g.bo_wt2[i].f); + } + E_INFO("%8d = LM.bo_wt2 entries read\n", k); + } + + /* read n_prob3 and prob3 array (in memory) */ + if (base->n > 2) { + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + model->lm3g.n_prob3 = k; + model->lm3g.prob3 = ckd_calloc(k, sizeof(*model->lm3g.prob3)); + if (fread(model->lm3g.prob3, sizeof(*model->lm3g.prob3), k, fp) != (size_t) k) { + E_ERROR("Failed to read trigram probability\n"); + goto error_out; + } + for (i = 0; i < k; i++) { + if (do_swap) + SWAP_INT32(&model->lm3g.prob3[i].l); + /* Convert values to log. */ + model->lm3g.prob3[i].l = logmath_log10_to_log(lmath, model->lm3g.prob3[i].f); + } + E_INFO("%8d = LM.prob3 entries read\n", k); + } + + /* read tseg_base size and tseg_base */ + if (do_mmap) + offset = ftell(fp); + if (n_trigram > 0) { + if (do_mmap) { + memcpy(&k, map_base + offset, sizeof(k)); + offset += sizeof(int32); + model->lm3g.tseg_base = (int32 *) (map_base + offset); + offset += k * sizeof(int32); + } + else { + k = (n_bigram + 1) / BG_SEG_SZ + 1; + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + model->lm3g.tseg_base = ckd_calloc(k, sizeof(int32)); + if (fread(model->lm3g.tseg_base, sizeof(int32), k, fp) != + (size_t) k) { + E_ERROR("Failed to read trigram index\n"); + goto error_out; + } + if (do_swap) + for (i = 0; i < k; i++) + SWAP_INT32(&model->lm3g.tseg_base[i]); + } + E_INFO("%8d = LM.tseg_base entries read\n", k); + } + + /* read ascii word strings */ + if (do_mmap) { + memcpy(&k, map_base + offset, sizeof(k)); + offset += sizeof(int32); + tmp_word_str = (char *) (map_base + offset); + offset += k; + } + else { + base->writable = TRUE; + if (fread(&k, sizeof(k), 1, fp) != 1) + goto error_out; + if (do_swap) SWAP_INT32(&k); + tmp_word_str = ckd_calloc(k, 1); + if (fread(tmp_word_str, 1, k, fp) != (size_t) k) { + E_ERROR("Failed to read words\n"); + goto error_out; + } + } + + /* First make sure string just read contains n_counts[0] words (PARANOIA!!) */ + for (i = 0, j = 0; i < k; i++) + if (tmp_word_str[i] == '\0') + j++; + if (j != n_unigram) { + E_ERROR("Error reading word strings (%d doesn't match n_unigrams %d)\n", + j, n_unigram); + goto error_out; + } + + /* Break up string just read into words */ + if (do_mmap) { + j = 0; + for (i = 0; i < n_unigram; i++) { + base->word_str[i] = tmp_word_str + j; + if (hash_table_enter(base->wid, base->word_str[i], + (void *)(long)i) != (void *)(long)i) { + E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); + } + j += strlen(base->word_str[i]) + 1; + } + } + else { + j = 0; + for (i = 0; i < n_unigram; i++) { + base->word_str[i] = ckd_salloc(tmp_word_str + j); + if (hash_table_enter(base->wid, base->word_str[i], + (void *)(long)i) != (void *)(long)i) { + E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); + } + j += strlen(base->word_str[i]) + 1; + } + free(tmp_word_str); + } + E_INFO("%8d = ascii word strings read\n", i); + + fclose_comp(fp, is_pipe); + return base; + +error_out: + if (fp) + fclose_comp(fp, is_pipe); + ngram_model_free(base); + return NULL; +} + +ngram_model_dmp_t * +ngram_model_dmp_build(ngram_model_t *base) +{ + ngram_model_dmp_t *model; + ngram_model_t *newbase; + ngram_iter_t *itor; + sorted_list_t sorted_prob2; + sorted_list_t sorted_bo_wt2; + sorted_list_t sorted_prob3; + bigram_t *bgptr; + trigram_t *tgptr; + int i, bgcount, tgcount, seg; + + if (base->funcs == &ngram_model_dmp_funcs) { + E_INFO("Using existing DMP model.\n"); + return (ngram_model_dmp_t *)ngram_model_retain(base); + } + + /* Initialize new base model structure with params from base. */ + E_INFO("Building DMP model...\n"); + model = ckd_calloc(1, sizeof(*model)); + newbase = &model->base; + ngram_model_init(newbase, &ngram_model_dmp_funcs, + logmath_retain(base->lmath), + base->n, base->n_counts[0]); + /* Copy N-gram counts over. */ + memcpy(newbase->n_counts, base->n_counts, + base->n * sizeof(*base->n_counts)); + /* Make sure word strings are freed. */ + newbase->writable = TRUE; + /* Initialize unigram table and string table. */ + model->lm3g.unigrams = new_unigram_table(newbase->n_counts[0] + 1); + for (itor = ngram_model_mgrams(base, 0); itor; + itor = ngram_iter_next(itor)) { + int32 prob1, bo_wt1; + int32 const *wids; + + /* Can't guarantee they will go in unigram order, so just to + * be correct, we do this... */ + wids = ngram_iter_get(itor, &prob1, &bo_wt1); + model->lm3g.unigrams[wids[0]].prob1.l = prob1; + model->lm3g.unigrams[wids[0]].bo_wt1.l = bo_wt1; + newbase->word_str[wids[0]] = ckd_salloc(ngram_word(base, wids[0])); + if ((hash_table_enter_int32(newbase->wid, + newbase->word_str[wids[0]], wids[0])) + != wids[0]) { + E_WARN("Duplicate word in dictionary: %s\n", newbase->word_str[wids[0]]); + } + } + E_INFO("%8d = #unigrams created\n", newbase->n_counts[0]); + + if (newbase->n < 2) + return model; + + /* Construct quantized probability table for bigrams and + * (optionally) trigrams. Hesitate to use the "sorted list" thing + * since it isn't so useful, but it's there already. */ + init_sorted_list(&sorted_prob2); + if (newbase->n > 2) { + init_sorted_list(&sorted_bo_wt2); + init_sorted_list(&sorted_prob3); + } + /* Construct bigram and trigram arrays. */ + bgptr = model->lm3g.bigrams = ckd_calloc(newbase->n_counts[1] + 1, sizeof(bigram_t)); + if (newbase->n > 2) { + tgptr = model->lm3g.trigrams = ckd_calloc(newbase->n_counts[2], sizeof(trigram_t)); + model->lm3g.tseg_base = + ckd_calloc((newbase->n_counts[1] + 1) / BG_SEG_SZ + 1, sizeof(int32)); + } + else + tgptr = NULL; + /* Since bigrams and trigrams have to be contiguous with others + * with the same N-1-gram, we traverse them in depth-first order + * to build the bigram and trigram arrays. */ + for (i = 0; i < newbase->n_counts[0]; ++i) { + ngram_iter_t *uitor; + bgcount = bgptr - model->lm3g.bigrams; + /* First bigram index (same as next if no bigrams...) */ + model->lm3g.unigrams[i].bigrams = bgcount; + E_DEBUG(2, ("unigram %d: %s => bigram %d\n", i, newbase->word_str[i], bgcount)); + /* All bigrams corresponding to unigram i */ + uitor = ngram_ng_iter(base, i, NULL, 0); + for (itor = ngram_iter_successors(uitor); + itor; ++bgptr, itor = ngram_iter_next(itor)) { + int32 prob2, bo_wt2; + int32 const *wids; + ngram_iter_t *titor; + + wids = ngram_iter_get(itor, &prob2, &bo_wt2); + + assert (bgptr - model->lm3g.bigrams < newbase->n_counts[1]); + + bgptr->wid = wids[1]; + bgptr->prob2 = sorted_id(&sorted_prob2, &prob2); + if (newbase->n > 2) { + tgcount = (tgptr - model->lm3g.trigrams); + bgcount = (bgptr - model->lm3g.bigrams); + + /* Backoff weight (only if there are trigrams...) */ + bgptr->bo_wt2 = sorted_id(&sorted_bo_wt2, &bo_wt2); + + /* Find bigram segment for this bigram (this isn't + * used unless there are trigrams) */ + seg = bgcount >> LOG_BG_SEG_SZ; + /* If we just crossed a bigram segment boundary, then + * point tseg_base for the new segment to the current + * trigram pointer. */ + if (seg != (bgcount - 1) >> LOG_BG_SEG_SZ) + model->lm3g.tseg_base[seg] = tgcount; + /* Now calculate the trigram offset. */ + bgptr->trigrams = tgcount - model->lm3g.tseg_base[seg]; + E_DEBUG(2, ("bigram %d %s %s => trigram %d:%d\n", + bgcount, + newbase->word_str[wids[0]], + newbase->word_str[wids[1]], + seg, bgptr->trigrams)); + + /* And fill in successors' trigram info. */ + for (titor = ngram_iter_successors(itor); + titor; ++tgptr, titor = ngram_iter_next(titor)) { + int32 prob3, dummy; + + assert(tgptr - model->lm3g.trigrams < newbase->n_counts[2]); + wids = ngram_iter_get(titor, &prob3, &dummy); + tgptr->wid = wids[2]; + tgptr->prob3 = sorted_id(&sorted_prob3, &prob3); + E_DEBUG(2, ("trigram %d %s %s %s => prob %d\n", + tgcount, + newbase->word_str[wids[0]], + newbase->word_str[wids[1]], + newbase->word_str[wids[2]], + tgptr->prob3)); + } + } + } + ngram_iter_free(uitor); + } + /* Add sentinal unigram and bigram records. */ + bgcount = bgptr - model->lm3g.bigrams; + tgcount = tgptr - model->lm3g.trigrams; + seg = bgcount >> LOG_BG_SEG_SZ; + if (seg != (bgcount - 1) >> LOG_BG_SEG_SZ) + model->lm3g.tseg_base[seg] = tgcount; + model->lm3g.unigrams[i].bigrams = bgcount; + if (newbase->n > 2) + bgptr->trigrams = tgcount - model->lm3g.tseg_base[seg]; + + /* Now create probability tables. */ + model->lm3g.n_prob2 = sorted_prob2.free; + model->lm3g.prob2 = vals_in_sorted_list(&sorted_prob2); + E_INFO("%8d = #bigrams created\n", newbase->n_counts[1]); + E_INFO("%8d = #prob2 entries\n", model->lm3g.n_prob2); + free_sorted_list(&sorted_prob2); + if (newbase->n > 2) { + /* Create trigram bo-wts array. */ + model->lm3g.n_bo_wt2 = sorted_bo_wt2.free; + model->lm3g.bo_wt2 = vals_in_sorted_list(&sorted_bo_wt2); + free_sorted_list(&sorted_bo_wt2); + E_INFO("%8d = #bo_wt2 entries\n", model->lm3g.n_bo_wt2); + /* Create trigram probability table. */ + model->lm3g.n_prob3 = sorted_prob3.free; + model->lm3g.prob3 = vals_in_sorted_list(&sorted_prob3); + E_INFO("%8d = #trigrams created\n", newbase->n_counts[2]); + E_INFO("%8d = #prob3 entries\n", model->lm3g.n_prob3); + free_sorted_list(&sorted_prob3); + /* Initialize tginfo */ + model->lm3g.tginfo = ckd_calloc(newbase->n_counts[0], sizeof(tginfo_t *)); + model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t)); + } + + return model; +} + +static void +fwrite_int32(FILE *fh, int32 val) +{ + fwrite(&val, 4, 1, fh); +} + +static void +fwrite_ug(FILE *fh, unigram_t *ug, logmath_t *lmath) +{ + int32 bogus = -1; + float32 log10val; + + /* Bogus dictionary mapping field. */ + fwrite(&bogus, 4, 1, fh); + /* Convert values to log10. */ + log10val = logmath_log_to_log10(lmath, ug->prob1.l); + fwrite(&log10val, 4, 1, fh); + log10val = logmath_log_to_log10(lmath, ug->bo_wt1.l); + fwrite(&log10val, 4, 1, fh); + fwrite_int32(fh, ug->bigrams); +} + +static void +fwrite_bg(FILE *fh, bigram_t *bg) +{ + fwrite(bg, sizeof(*bg), 1, fh); +} + +static void +fwrite_tg(FILE *fh, trigram_t *tg) +{ + fwrite(tg, sizeof(*tg), 1, fh); +} + +/** Please look at the definition of + */ +static char const *fmtdesc[] = { + "BEGIN FILE FORMAT DESCRIPTION", + "Header string length (int32) and string (including trailing 0)", + "Original LM filename string-length (int32) and filename (including trailing 0)", + "(int32) version number (present iff value <= 0)", + "(int32) original LM file modification timestamp (iff version# present)", + "(int32) string-length and string (including trailing 0) (iff version# present)", + "... previous entry continued any number of times (iff version# present)", + "(int32) 0 (terminating sequence of strings) (iff version# present)", + "(int32) log_bg_seg_sz (present iff different from default value of LOG2_BG_SEG_SZ)", + "(int32) lm_t.ucount (must be > 0)", + "(int32) lm_t.bcount", + "(int32) lm_t.tcount", + "lm_t.ucount+1 unigrams (including sentinel)", + "lm_t.bcount+1 bigrams (including sentinel 64 bits (bg_t) each if version=-1/-2, 128 bits (bg32_t) each if version=-3", + "lm_t.tcount trigrams (present iff lm_t.tcount > 0 32 bits (tg_t) each if version=-1/-2, 64 bits (tg32_t) each if version=-3)", + "(int32) lm_t.n_prob2", + "(int32) lm_t.prob2[]", + "(int32) lm_t.n_bo_wt2 (present iff lm_t.tcount > 0)", + "(int32) lm_t.bo_wt2[] (present iff lm_t.tcount > 0)", + "(int32) lm_t.n_prob3 (present iff lm_t.tcount > 0)", + "(int32) lm_t.prob3[] (present iff lm_t.tcount > 0)", + "(int32) (lm_t.bcount+1)/BG_SEG_SZ+1 (present iff lm_t.tcount > 0)", + "(int32) lm_t.tseg_base[] (present iff lm_t.tcount > 0)", + "(int32) Sum(all word string-lengths, including trailing 0 for each)", + "All word strings (including trailing 0 for each)", + "END FILE FORMAT DESCRIPTION", + NULL, +}; + +static void +ngram_model_dmp_write_header(FILE * fh) +{ + int32 k; + k = strlen(darpa_hdr) + 1; + fwrite_int32(fh, k); + fwrite(darpa_hdr, 1, k, fh); +} + +static void +ngram_model_dmp_write_lm_filename(FILE * fh, const char *lmfile) +{ + int32 k; + + k = strlen(lmfile) + 1; + fwrite_int32(fh, k); + fwrite(lmfile, 1, k, fh); +} + +#define LMDMP_VERSION_TG_16BIT -1 /**< VERSION 1 is the simplest DMP file which + is trigram or lower which used 16 bits in + bigram and trigram.*/ + +static void +ngram_model_dmp_write_version(FILE * fh, int32 mtime) +{ + fwrite_int32(fh, LMDMP_VERSION_TG_16BIT); /* version # */ + fwrite_int32(fh, mtime); +} + +static void +ngram_model_dmp_write_ngram_counts(FILE * fh, ngram_model_t *model) +{ + fwrite_int32(fh, model->n_counts[0]); + fwrite_int32(fh, model->n_counts[1]); + fwrite_int32(fh, model->n_counts[2]); +} + +static void +ngram_model_dmp_write_fmtdesc(FILE * fh) +{ + int32 i, k; + long pos; + + /* Write file format description into header */ + for (i = 0; fmtdesc[i] != NULL; i++) { + k = strlen(fmtdesc[i]) + 1; + fwrite_int32(fh, k); + fwrite(fmtdesc[i], 1, k, fh); + } + /* Pad it out in order to achieve 32-bit alignment */ + pos = ftell(fh); + k = pos & 3; + if (k) { + fwrite_int32(fh, 4-k); + fwrite("!!!!", 1, 4-k, fh); + } + fwrite_int32(fh, 0); +} + +static void +ngram_model_dmp_write_unigram(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i; + + for (i = 0; i <= model->n_counts[0]; i++) { + fwrite_ug(fh, &(lm->lm3g.unigrams[i]), model->lmath); + } +} + + +static void +ngram_model_dmp_write_bigram(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i; + + for (i = 0; i <= model->n_counts[1]; i++) { + fwrite_bg(fh, &(lm->lm3g.bigrams[i])); + } + +} + +static void +ngram_model_dmp_write_trigram(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i; + + for (i = 0; i < model->n_counts[2]; i++) { + fwrite_tg(fh, &(lm->lm3g.trigrams[i])); + } +} + +static void +ngram_model_dmp_write_bgprob(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i; + + fwrite_int32(fh, lm->lm3g.n_prob2); + for (i = 0; i < lm->lm3g.n_prob2; i++) { + float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.prob2[i].l); + fwrite(&log10val, 4, 1, fh); + } +} + +static void +ngram_model_dmp_write_tgbowt(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i; + + fwrite_int32(fh, lm->lm3g.n_bo_wt2); + for (i = 0; i < lm->lm3g.n_bo_wt2; i++) { + float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.bo_wt2[i].l); + fwrite(&log10val, 4, 1, fh); + } +} + +static void +ngram_model_dmp_write_tgprob(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i; + + fwrite_int32(fh, lm->lm3g.n_prob3); + for (i = 0; i < lm->lm3g.n_prob3; i++) { + float32 log10val = logmath_log_to_log10(model->lmath, lm->lm3g.prob3[i].l); + fwrite(&log10val, 4, 1, fh); + } +} + +static void +ngram_model_dmp_write_tg_segbase(FILE *fh, ngram_model_t *model) +{ + ngram_model_dmp_t *lm = (ngram_model_dmp_t *)model; + int32 i, k; + + k = (model->n_counts[1] + 1) / BG_SEG_SZ + 1; + fwrite_int32(fh, k); + for (i = 0; i < k; i++) + fwrite_int32(fh, lm->lm3g.tseg_base[i]); +} + +static void +ngram_model_dmp_write_wordstr(FILE *fh, ngram_model_t *model) +{ + int32 i, k; + + k = 0; + for (i = 0; i < model->n_counts[0]; i++) + k += strlen(model->word_str[i]) + 1; + fwrite_int32(fh, k); + for (i = 0; i < model->n_counts[0]; i++) + fwrite(model->word_str[i], 1, + strlen(model->word_str[i]) + 1, fh); +} + +int +ngram_model_dmp_write(ngram_model_t *base, + const char *file_name) +{ + ngram_model_dmp_t *model; + ngram_model_t *newbase; + FILE *fh; + + /* First, construct a DMP model from the base model. */ + model = ngram_model_dmp_build(base); + newbase = &model->base; + + /* Now write it, confident in the knowledge that it's the right + * kind of language model internally. */ + if ((fh = fopen(file_name, "wb")) == NULL) { + E_ERROR("Cannot create file %s\n", file_name); + return -1; + } + ngram_model_dmp_write_header(fh); + ngram_model_dmp_write_lm_filename(fh, file_name); + ngram_model_dmp_write_version(fh, 0); + ngram_model_dmp_write_fmtdesc(fh); + ngram_model_dmp_write_ngram_counts(fh, newbase); + ngram_model_dmp_write_unigram(fh, newbase); + if (newbase->n > 1) { + ngram_model_dmp_write_bigram(fh, newbase); + if (newbase->n > 2) { + ngram_model_dmp_write_trigram(fh, newbase); + } + ngram_model_dmp_write_bgprob(fh, newbase); + if (newbase->n > 2) { + ngram_model_dmp_write_tgbowt(fh, newbase); + ngram_model_dmp_write_tgprob(fh, newbase); + ngram_model_dmp_write_tg_segbase(fh, newbase); + } + } + ngram_model_dmp_write_wordstr(fh, newbase); + ngram_model_free(newbase); + + return fclose(fh); +} + +static int +ngram_model_dmp_apply_weights(ngram_model_t *base, float32 lw, + float32 wip, float32 uw) +{ + ngram_model_dmp_t *model = (ngram_model_dmp_t *)base; + lm3g_apply_weights(base, &model->lm3g, lw, wip, uw); + return 0; +} + +/* Lousy "templating" for things that are largely the same in DMP and + * ARPA models, except for the bigram and trigram types and some + * names. */ +#define NGRAM_MODEL_TYPE ngram_model_dmp_t +#include "lm3g_templates.c" + +static void +ngram_model_dmp_free(ngram_model_t *base) +{ + ngram_model_dmp_t *model = (ngram_model_dmp_t *)base; + + ckd_free(model->lm3g.unigrams); + ckd_free(model->lm3g.prob2); + if (model->dump_mmap) { + mmio_file_unmap(model->dump_mmap); + } + else { + ckd_free(model->lm3g.bigrams); + if (base->n > 2) { + ckd_free(model->lm3g.trigrams); + ckd_free(model->lm3g.tseg_base); + } + } + if (base->n > 2) { + ckd_free(model->lm3g.bo_wt2); + ckd_free(model->lm3g.prob3); + } + + lm3g_tginfo_free(base, &model->lm3g); +} + +static ngram_funcs_t ngram_model_dmp_funcs = { + ngram_model_dmp_free, /* free */ + ngram_model_dmp_apply_weights, /* apply_weights */ + lm3g_template_score, /* score */ + lm3g_template_raw_score, /* raw_score */ + lm3g_template_add_ug, /* add_ug */ + lm3g_template_flush, /* flush */ + lm3g_template_iter, /* iter */ + lm3g_template_mgrams, /* mgrams */ + lm3g_template_successors, /* successors */ + lm3g_template_iter_get, /* iter_get */ + lm3g_template_iter_next, /* iter_next */ + lm3g_template_iter_free /* iter_free */ +}; diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h new file mode 100644 index 000000000..a3b141ad1 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_dmp.h @@ -0,0 +1,92 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model_dmp.h DMP format for N-Gram models + * + * Author: David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_DMP_H__ +#define __NGRAM_MODEL_DMP_H__ + +#include "sphinxbase/mmio.h" + +#include "ngram_model_internal.h" +#include "lm3g_model.h" + +/** + * On-disk representation of bigrams. + */ +struct bigram_s { + uint16 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ + uint16 prob2; /**< Index into array of actual bigram probs */ + uint16 bo_wt2; /**< Index into array of actual bigram backoff wts */ + uint16 trigrams; /**< Index of 1st entry in lm_t.trigrams[], + RELATIVE TO its segment base (see lm3g_model.h) */ +}; + +/** + * On-disk representation of trigrams. + * + * As with bigrams, trigram prob info kept in a separate table for conserving + * memory space. + */ +struct trigram_s { + uint16 wid; /**< Index of unigram entry for this. (NOT dictionary id.) */ + uint16 prob3; /**< Index into array of actual trigram probs */ +}; + +/** + * Subclass of ngram_model for DMP file reading. + */ +typedef struct ngram_model_dmp_s { + ngram_model_t base; /**< Base ngram_model_t structure */ + lm3g_model_t lm3g; /**< Common lm3g_model_t structure */ + mmio_file_t *dump_mmap; /**< mmap() of dump file (or NULL if none) */ +} ngram_model_dmp_t; + +/** + * Construct a DMP format model from a generic base model. + * + * Note: If base is already a DMP format model, this just calls + * ngram_model_retain(), and any changes will also be made in the base + * model. + */ +ngram_model_dmp_t *ngram_model_dmp_build(ngram_model_t *base); + + +#endif /* __NGRAM_MODEL_DMP_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h new file mode 100644 index 000000000..dcc7b5ae3 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_internal.h @@ -0,0 +1,282 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model_internal.h Internal structures for N-Gram models + * + * Author: David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_INTERNAL_H__ +#define __NGRAM_MODEL_INTERNAL_H__ + +#include "sphinxbase/ngram_model.h" +#include "sphinxbase/hash_table.h" + +/** + * Common implementation of ngram_model_t. + * + * The details of bigram, trigram, and higher-order N-gram storage, if any, can + * vary somewhat depending on the file format in use. + */ +struct ngram_model_s { + int refcount; /**< Reference count */ + int32 *n_counts; /**< Counts for 1, 2, 3, ... grams */ + int32 n_1g_alloc; /**< Number of allocated word strings (for new word addition) */ + int32 n_words; /**< Number of actual word strings (NOT the same as the + number of unigrams, due to class words). */ + uint8 n; /**< This is an n-gram model (1, 2, 3, ...). */ + uint8 n_classes; /**< Number of classes (maximum 128) */ + uint8 writable; /**< Are word strings writable? */ + uint8 flags; /**< Any other flags we might care about + (FIXME: Merge this and writable) */ + logmath_t *lmath; /**< Log-math object */ + float32 lw; /**< Language model scaling factor */ + int32 log_wip; /**< Log of word insertion penalty */ + int32 log_uw; /**< Log of unigram weight */ + int32 log_uniform; /**< Log of uniform (0-gram) probability */ + int32 log_uniform_weight; /**< Log of uniform weight (i.e. 1 - unigram weight) */ + int32 log_zero; /**< Zero probability, cached here for quick lookup */ + char **word_str; /**< Unigram names */ + hash_table_t *wid; /**< Mapping of unigram names to word IDs. */ + int32 *tmp_wids; /**< Temporary array of word IDs for ngram_model_get_ngram() */ + struct ngram_class_s **classes; /**< Word class definitions. */ + struct ngram_funcs_s *funcs; /**< Implementation-specific methods. */ +}; + +/** + * Implementation of ngram_class_t. + */ +struct ngram_class_s { + int32 tag_wid; /**< Base word ID for this class tag */ + int32 start_wid; /**< Starting base word ID for this class' words */ + int32 n_words; /**< Number of base words for this class */ + int32 *prob1; /**< Probability table for base words */ + /** + * Custom hash table for additional words. + */ + struct ngram_hash_s { + int32 wid; /**< Word ID of this bucket */ + int32 prob1; /**< Probability for this word */ + int32 next; /**< Index of next bucket (or -1 for no collision) */ + } *nword_hash; + int32 n_hash; /**< Number of buckets in nword_hash (power of 2) */ + int32 n_hash_inuse; /**< Number of words in nword_hash */ +}; + +#define NGRAM_HASH_SIZE 128 + +#define NGRAM_BASEWID(wid) ((wid)&0xffffff) +#define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f) +#define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid)) +#define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000) + +#define UG_ALLOC_STEP 10 + +/** Implementation-specific functions for operating on ngram_model_t objects */ +typedef struct ngram_funcs_s { + /** + * Implementation-specific function for freeing an ngram_model_t. + */ + void (*free)(ngram_model_t *model); + /** + * Implementation-specific function for applying language model weights. + */ + int (*apply_weights)(ngram_model_t *model, + float32 lw, + float32 wip, + float32 uw); + /** + * Implementation-specific function for querying language model score. + */ + int32 (*score)(ngram_model_t *model, + int32 wid, + int32 *history, + int32 n_hist, + int32 *n_used); + /** + * Implementation-specific function for querying raw language + * model probability. + */ + int32 (*raw_score)(ngram_model_t *model, + int32 wid, + int32 *history, + int32 n_hist, + int32 *n_used); + /** + * Implementation-specific function for adding unigrams. + * + * This function updates the internal structures of a language + * model to add the given unigram with the given weight (defined + * as a log-factor applied to the uniform distribution). This + * includes reallocating or otherwise resizing the set of unigrams. + * + * @return The language model score (not raw log-probability) of + * the new word, or 0 for failure. + */ + int32 (*add_ug)(ngram_model_t *model, + int32 wid, int32 lweight); + /** + * Implementation-specific function for purging N-Gram cache + */ + void (*flush)(ngram_model_t *model); + + /** + * Implementation-specific function for iterating. + */ + ngram_iter_t * (*iter)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist); + + /** + * Implementation-specific function for iterating. + */ + ngram_iter_t * (*mgrams)(ngram_model_t *model, int32 m); + + /** + * Implementation-specific function for iterating. + */ + ngram_iter_t * (*successors)(ngram_iter_t *itor); + + /** + * Implementation-specific function for iterating. + */ + int32 const * (*iter_get)(ngram_iter_t *itor, + int32 *out_score, + int32 *out_bowt); + + /** + * Implementation-specific function for iterating. + */ + ngram_iter_t * (*iter_next)(ngram_iter_t *itor); + + /** + * Implementation-specific function for iterating. + */ + void (*iter_free)(ngram_iter_t *itor); +} ngram_funcs_t; + +/** + * Base iterator structure for N-grams. + */ +struct ngram_iter_s { + ngram_model_t *model; + int32 *wids; /**< Scratch space for word IDs. */ + int16 m; /**< Order of history. */ + int16 successor; /**< Is this a successor iterator? */ +}; + +/** + * One class definition from a classdef file. + */ +typedef struct classdef_s { + char **words; + float32 *weights; + int32 n_words; +} classdef_t; + +/** + * Initialize the base ngram_model_t structure. + */ +int32 +ngram_model_init(ngram_model_t *model, + ngram_funcs_t *funcs, + logmath_t *lmath, + int32 n, int32 n_unigram); + +/** + * Read an N-Gram model from an ARPABO text file. + */ +ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config, + const char *file_name, + logmath_t *lmath); +/** + * Read an N-Gram model from a Sphinx .DMP binary file. + */ +ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config, + const char *file_name, + logmath_t *lmath); +/** + * Read an N-Gram model from a Sphinx .DMP32 binary file. + */ +ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config, + const char *file_name, + logmath_t *lmath); + +/** + * Write an N-Gram model to an ARPABO text file. + */ +int ngram_model_arpa_write(ngram_model_t *model, + const char *file_name); +/** + * Write an N-Gram model to a Sphinx .DMP binary file. + */ +int ngram_model_dmp_write(ngram_model_t *model, + const char *file_name); + +/** + * Read a probdef file. + */ +int32 read_classdef_file(hash_table_t *classes, const char *classdef_file); + +/** + * Free a class definition. + */ +void classdef_free(classdef_t *classdef); + +/** + * Allocate and initialize an N-Gram class. + */ +ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid, + int32 start_wid, glist_t classwords); + +/** + * Deallocate an N-Gram class. + */ +void ngram_class_free(ngram_class_t *lmclass); + +/** + * Get the in-class log probability for a word in an N-Gram class. + * + * @return This probability, or 1 if word not found. + */ +int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid); + +/** + * Initialize base M-Gram iterator structure. + */ +void ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model, + int m, int successor); + +#endif /* __NGRAM_MODEL_INTERNAL_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c new file mode 100644 index 000000000..50b7557ae --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.c @@ -0,0 +1,870 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ngram_model_set.c Set of language models. + * @author David Huggins-Daines + */ + +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/filename.h" + +#include "ngram_model_set.h" + +static ngram_funcs_t ngram_model_set_funcs; + +static int +my_compare(const void *a, const void *b) +{ + /* Make sure floats to the beginning. */ + if (strcmp(*(char * const *)a, "") == 0) + return -1; + else if (strcmp(*(char * const *)b, "") == 0) + return 1; + else + return strcmp(*(char * const *)a, *(char * const *)b); +} + +static void +build_widmap(ngram_model_t *base, logmath_t *lmath, int32 n) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + ngram_model_t **models = set->lms; + hash_table_t *vocab; + glist_t hlist; + gnode_t *gn; + int32 i; + + /* Construct a merged vocabulary and a set of word-ID mappings. */ + vocab = hash_table_new(models[0]->n_words, FALSE); + /* Create the set of merged words. */ + for (i = 0; i < set->n_models; ++i) { + int32 j; + for (j = 0; j < models[i]->n_words; ++j) { + /* Ignore collisions. */ + (void)hash_table_enter_int32(vocab, models[i]->word_str[j], j); + } + } + /* Create the array of words, then sort it. */ + if (hash_table_lookup(vocab, "", NULL) != 0) + (void)hash_table_enter_int32(vocab, "", 0); + /* Now we know the number of unigrams, initialize the base model. */ + ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab)); + base->writable = FALSE; /* We will reuse the pointers from the submodels. */ + i = 0; + hlist = hash_table_tolist(vocab, NULL); + for (gn = hlist; gn; gn = gnode_next(gn)) { + hash_entry_t *ent = gnode_ptr(gn); + base->word_str[i++] = (char *)ent->key; + } + glist_free(hlist); + qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare); + + /* Now create the word ID mappings. */ + if (set->widmap) + ckd_free_2d((void **)set->widmap); + set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, + sizeof(**set->widmap)); + for (i = 0; i < base->n_words; ++i) { + int32 j; + /* Also create the master wid mapping. */ + (void)hash_table_enter_int32(base->wid, base->word_str[i], i); + /* printf("%s: %d => ", base->word_str[i], i); */ + for (j = 0; j < set->n_models; ++j) { + set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]); + /* printf("%d ", set->widmap[i][j]); */ + } + /* printf("\n"); */ + } + hash_table_free(vocab); +} + +ngram_model_t * +ngram_model_set_init(cmd_ln_t *config, + ngram_model_t **models, + char **names, + const float32 *weights, + int32 n_models) +{ + ngram_model_set_t *model; + ngram_model_t *base; + logmath_t *lmath; + int32 i, n; + + if (n_models == 0) /* WTF */ + return NULL; + + /* Do consistency checking on the models. They must all use the + * same logbase and shift. */ + lmath = models[0]->lmath; + for (i = 1; i < n_models; ++i) { + if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath) + || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) { + E_ERROR("Log-math parameters don't match, will not create LM set\n"); + return NULL; + } + } + + /* Allocate the combined model, initialize it. */ + model = ckd_calloc(1, sizeof(*model)); + base = &model->base; + model->n_models = n_models; + model->lms = ckd_calloc(n_models, sizeof(*model->lms)); + model->names = ckd_calloc(n_models, sizeof(*model->names)); + /* Initialize weights to a uniform distribution */ + model->lweights = ckd_calloc(n_models, sizeof(*model->lweights)); + { + int32 uniform = logmath_log(lmath, 1.0/n_models); + for (i = 0; i < n_models; ++i) + model->lweights[i] = uniform; + } + /* Default to interpolate if weights were given. */ + if (weights) + model->cur = -1; + + n = 0; + for (i = 0; i < n_models; ++i) { + model->lms[i] = ngram_model_retain(models[i]); + model->names[i] = ckd_salloc(names[i]); + if (weights) + model->lweights[i] = logmath_log(lmath, weights[i]); + /* N is the maximum of all merged models. */ + if (models[i]->n > n) + n = models[i]->n; + } + /* Allocate the history mapping table. */ + model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist)); + + /* Now build the word-ID mapping and merged vocabulary. */ + build_widmap(base, lmath, n); + return base; +} + +ngram_model_t * +ngram_model_set_read(cmd_ln_t *config, + const char *lmctlfile, + logmath_t *lmath) +{ + FILE *ctlfp; + glist_t lms = NULL; + glist_t lmnames = NULL; + __BIGSTACKVARIABLE__ char str[1024]; + ngram_model_t *set = NULL; + hash_table_t *classes; + char *basedir, *c; + + /* Read all the class definition files to accumulate a mapping of + * classnames to definitions. */ + classes = hash_table_new(0, FALSE); + if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open %s", lmctlfile); + return NULL; + } + + /* Try to find the base directory to append to relative paths in + * the lmctl file. */ + if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { + /* Include the trailing slash. */ + basedir = ckd_calloc(c - lmctlfile + 2, 1); + memcpy(basedir, lmctlfile, c - lmctlfile + 1); + } + else { + basedir = NULL; + } + E_INFO("Reading LM control file '%s'\n", lmctlfile); + if (basedir) + E_INFO("Will prepend '%s' to unqualified paths\n", basedir); + + if (fscanf(ctlfp, "%1023s", str) == 1) { + if (strcmp(str, "{") == 0) { + /* Load LMclass files */ + while ((fscanf(ctlfp, "%1023s", str) == 1) + && (strcmp(str, "}") != 0)) { + char *deffile; + if (basedir && !path_is_absolute(str)) + deffile = string_join(basedir, str, NULL); + else + deffile = ckd_salloc(str); + E_INFO("Reading classdef from '%s'\n", deffile); + if (read_classdef_file(classes, deffile) < 0) { + ckd_free(deffile); + goto error_out; + } + ckd_free(deffile); + } + + if (strcmp(str, "}") != 0) { + E_ERROR("Unexpected EOF in %s\n", lmctlfile); + goto error_out; + } + + /* This might be the first LM name. */ + if (fscanf(ctlfp, "%1023s", str) != 1) + str[0] = '\0'; + } + } + else + str[0] = '\0'; + + /* Read in one LM at a time and add classes to them as necessary. */ + while (str[0] != '\0') { + char *lmfile; + ngram_model_t *lm; + + if (basedir && str[0] != '/' && str[0] != '\\') + lmfile = string_join(basedir, str, NULL); + else + lmfile = ckd_salloc(str); + E_INFO("Reading lm from '%s'\n", lmfile); + lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); + if (lm == NULL) { + ckd_free(lmfile); + goto error_out; + } + if (fscanf(ctlfp, "%1023s", str) != 1) { + E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); + ckd_free(lmfile); + goto error_out; + } + ckd_free(lmfile); + lms = glist_add_ptr(lms, lm); + lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); + + if (fscanf(ctlfp, "%1023s", str) == 1) { + if (strcmp(str, "{") == 0) { + /* LM uses classes; read their names */ + while ((fscanf(ctlfp, "%1023s", str) == 1) && + (strcmp(str, "}") != 0)) { + void *val; + classdef_t *classdef; + + if (hash_table_lookup(classes, str, &val) == -1) { + E_ERROR("Unknown class %s in control file\n", str); + goto error_out; + } + classdef = val; + if (ngram_model_add_class(lm, str, 1.0, + classdef->words, classdef->weights, + classdef->n_words) < 0) { + goto error_out; + } + E_INFO("Added class %s containing %d words\n", + str, classdef->n_words); + } + if (strcmp(str, "}") != 0) { + E_ERROR("Unexpected EOF in %s\n", lmctlfile); + goto error_out; + } + if (fscanf(ctlfp, "%1023s", str) != 1) + str[0] = '\0'; + } + } + else + str[0] = '\0'; + } + fclose(ctlfp); + + /* Now construct arrays out of lms and lmnames, and build an + * ngram_model_set. */ + lms = glist_reverse(lms); + lmnames = glist_reverse(lmnames); + { + int32 n_models; + ngram_model_t **lm_array; + char **name_array; + gnode_t *lm_node, *name_node; + int32 i; + + n_models = glist_count(lms); + lm_array = ckd_calloc(n_models, sizeof(*lm_array)); + name_array = ckd_calloc(n_models, sizeof(*name_array)); + lm_node = lms; + name_node = lmnames; + for (i = 0; i < n_models; ++i) { + lm_array[i] = gnode_ptr(lm_node); + name_array[i] = gnode_ptr(name_node); + lm_node = gnode_next(lm_node); + name_node = gnode_next(name_node); + } + set = ngram_model_set_init(config, lm_array, name_array, + NULL, n_models); + ckd_free(lm_array); + ckd_free(name_array); + } +error_out: + { + gnode_t *gn; + glist_t hlist; + + if (set == NULL) { + for (gn = lms; gn; gn = gnode_next(gn)) { + ngram_model_free(gnode_ptr(gn)); + } + } + glist_free(lms); + for (gn = lmnames; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(lmnames); + hlist = hash_table_tolist(classes, NULL); + for (gn = hlist; gn; gn = gnode_next(gn)) { + hash_entry_t *he = gnode_ptr(gn); + ckd_free((char *)he->key); + classdef_free(he->val); + } + glist_free(hlist); + hash_table_free(classes); + ckd_free(basedir); + } + return set; +} + +int32 +ngram_model_set_count(ngram_model_t *base) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + return set->n_models; +} + +ngram_model_set_iter_t * +ngram_model_set_iter(ngram_model_t *base) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + ngram_model_set_iter_t *itor; + + if (set == NULL || set->n_models == 0) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + itor->set = set; + return itor; +} + +ngram_model_set_iter_t * +ngram_model_set_iter_next(ngram_model_set_iter_t *itor) +{ + if (++itor->cur == itor->set->n_models) { + ngram_model_set_iter_free(itor); + return NULL; + } + return itor; +} + +void +ngram_model_set_iter_free(ngram_model_set_iter_t *itor) +{ + ckd_free(itor); +} + +ngram_model_t * +ngram_model_set_iter_model(ngram_model_set_iter_t *itor, + char const **lmname) +{ + if (lmname) *lmname = itor->set->names[itor->cur]; + return itor->set->lms[itor->cur]; +} + +ngram_model_t * +ngram_model_set_lookup(ngram_model_t *base, + const char *name) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 i; + + if (name == NULL) { + if (set->cur == -1) + return NULL; + else + return set->lms[set->cur]; + } + + /* There probably won't be very many submodels. */ + for (i = 0; i < set->n_models; ++i) + if (0 == strcmp(set->names[i], name)) + break; + if (i == set->n_models) + return NULL; + return set->lms[i]; +} + +ngram_model_t * +ngram_model_set_select(ngram_model_t *base, + const char *name) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 i; + + /* There probably won't be very many submodels. */ + for (i = 0; i < set->n_models; ++i) + if (0 == strcmp(set->names[i], name)) + break; + if (i == set->n_models) + return NULL; + set->cur = i; + return set->lms[set->cur]; +} + +const char * +ngram_model_set_current(ngram_model_t *base) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + + if (set->cur == -1) + return NULL; + else + return set->names[set->cur]; +} + +int32 +ngram_model_set_current_wid(ngram_model_t *base, + int32 set_wid) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + + if (set->cur == -1 || set_wid >= base->n_words) + return NGRAM_INVALID_WID; + else + return set->widmap[set_wid][set->cur]; +} + +int32 +ngram_model_set_known_wid(ngram_model_t *base, + int32 set_wid) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + + if (set_wid >= base->n_words) + return FALSE; + else if (set->cur == -1) { + int32 i; + for (i = 0; i < set->n_models; ++i) { + if (set->widmap[set_wid][i] != ngram_unknown_wid(set->lms[i])) + return TRUE; + } + return FALSE; + } + else + return (set->widmap[set_wid][set->cur] + != ngram_unknown_wid(set->lms[set->cur])); +} + +ngram_model_t * +ngram_model_set_interp(ngram_model_t *base, + const char **names, + const float32 *weights) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + + /* If we have a set of weights here, then set them. */ + if (names && weights) { + int32 i, j; + + /* We hope there aren't many models. */ + for (i = 0; i < set->n_models; ++i) { + for (j = 0; j < set->n_models; ++j) + if (0 == strcmp(names[i], set->names[j])) + break; + if (j == set->n_models) { + E_ERROR("Unknown LM name %s\n", names[i]); + return NULL; + } + set->lweights[j] = logmath_log(base->lmath, weights[i]); + } + } + else if (weights) { + memcpy(set->lweights, weights, set->n_models * sizeof(*set->lweights)); + } + /* Otherwise just enable existing weights. */ + set->cur = -1; + return base; +} + +ngram_model_t * +ngram_model_set_add(ngram_model_t *base, + ngram_model_t *model, + const char *name, + float32 weight, + int reuse_widmap) + +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + float32 fprob; + int32 scale, i; + + /* Add it to the array of lms. */ + ++set->n_models; + set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms)); + set->lms[set->n_models - 1] = model; + set->names = ckd_realloc(set->names, set->n_models * sizeof(*set->names)); + set->names[set->n_models - 1] = ckd_salloc(name); + /* Expand the history mapping table if necessary. */ + if (model->n > base->n) { + base->n = model->n; + set->maphist = ckd_realloc(set->maphist, + (model->n - 1) * sizeof(*set->maphist)); + } + + /* Renormalize the interpolation weights. */ + fprob = weight * 1.0 / set->n_models; + set->lweights = ckd_realloc(set->lweights, + set->n_models * sizeof(*set->lweights)); + set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob); + /* Now normalize everything else to fit it in. This is + * accomplished by simply scaling all the other probabilities + * by (1-fprob). */ + scale = logmath_log(base->lmath, 1.0 - fprob); + for (i = 0; i < set->n_models - 1; ++i) + set->lweights[i] += scale; + + /* Reuse the old word ID mapping if requested. */ + if (reuse_widmap) { + int32 **new_widmap; + + /* Tack another column onto the widmap array. */ + new_widmap = (int32 **)ckd_calloc_2d(base->n_words, set->n_models, + sizeof (**new_widmap)); + for (i = 0; i < base->n_words; ++i) { + /* Copy all the existing mappings. */ + memcpy(new_widmap[i], set->widmap[i], + (set->n_models - 1) * sizeof(**new_widmap)); + /* Create the new mapping. */ + new_widmap[i][set->n_models-1] = ngram_wid(model, base->word_str[i]); + } + ckd_free_2d((void **)set->widmap); + set->widmap = new_widmap; + } + else { + build_widmap(base, base->lmath, base->n); + } + return model; +} + +ngram_model_t * +ngram_model_set_remove(ngram_model_t *base, + const char *name, + int reuse_widmap) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + ngram_model_t *submodel; + int32 lmidx, scale, n, i; + float32 fprob; + + for (lmidx = 0; lmidx < set->n_models; ++lmidx) + if (0 == strcmp(name, set->names[lmidx])) + break; + if (lmidx == set->n_models) + return NULL; + submodel = set->lms[lmidx]; + + /* Renormalize the interpolation weights by scaling them by + * 1/(1-fprob) */ + fprob = logmath_exp(base->lmath, set->lweights[lmidx]); + scale = logmath_log(base->lmath, 1.0 - fprob); + + /* Remove it from the array of lms, renormalize remaining weights, + * and recalcluate n. */ + --set->n_models; + n = 0; + ckd_free(set->names[lmidx]); + set->names[lmidx] = NULL; + for (i = 0; i < set->n_models; ++i) { + if (i >= lmidx) { + set->lms[i] = set->lms[i+1]; + set->names[i] = set->names[i+1]; + set->lweights[i] = set->lweights[i+1]; + } + set->lweights[i] -= scale; + if (set->lms[i]->n > n) + n = set->lms[i]->n; + } + /* There's no need to shrink these arrays. */ + set->lms[set->n_models] = NULL; + set->lweights[set->n_models] = base->log_zero; + /* No need to shrink maphist either. */ + + /* Reuse the existing word ID mapping if requested. */ + if (reuse_widmap) { + /* Just go through and shrink each row. */ + for (i = 0; i < base->n_words; ++i) { + memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1, + (set->n_models - lmidx) * sizeof(**set->widmap)); + } + } + else { + build_widmap(base, base->lmath, n); + } + return submodel; +} + +void +ngram_model_set_map_words(ngram_model_t *base, + const char **words, + int32 n_words) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 i; + + /* Recreate the word mapping. */ + if (base->writable) { + for (i = 0; i < base->n_words; ++i) { + ckd_free(base->word_str[i]); + } + } + ckd_free(base->word_str); + ckd_free_2d((void **)set->widmap); + base->writable = TRUE; + base->n_words = base->n_1g_alloc = n_words; + base->word_str = ckd_calloc(n_words, sizeof(*base->word_str)); + set->widmap = (int32 **)ckd_calloc_2d(n_words, set->n_models, sizeof(**set->widmap)); + hash_table_empty(base->wid); + for (i = 0; i < n_words; ++i) { + int32 j; + base->word_str[i] = ckd_salloc(words[i]); + (void)hash_table_enter_int32(base->wid, base->word_str[i], i); + for (j = 0; j < set->n_models; ++j) { + set->widmap[i][j] = ngram_wid(set->lms[j], base->word_str[i]); + } + } +} + +static int +ngram_model_set_apply_weights(ngram_model_t *base, float32 lw, + float32 wip, float32 uw) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 i; + + /* Apply weights to each sub-model. */ + for (i = 0; i < set->n_models; ++i) + ngram_model_apply_weights(set->lms[i], lw, wip, uw); + return 0; +} + +static int32 +ngram_model_set_score(ngram_model_t *base, int32 wid, + int32 *history, int32 n_hist, + int32 *n_used) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 mapwid; + int32 score; + int32 i; + + /* Truncate the history. */ + if (n_hist > base->n - 1) + n_hist = base->n - 1; + + /* Interpolate if there is no current. */ + if (set->cur == -1) { + score = base->log_zero; + for (i = 0; i < set->n_models; ++i) { + int32 j; + /* Map word and history IDs for each model. */ + mapwid = set->widmap[wid][i]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][i]; + } + score = logmath_add(base->lmath, score, + set->lweights[i] + + ngram_ng_score(set->lms[i], + mapwid, set->maphist, n_hist, n_used)); + } + } + else { + int32 j; + /* Map word and history IDs (FIXME: do this in a function?) */ + mapwid = set->widmap[wid][set->cur]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][set->cur]; + } + score = ngram_ng_score(set->lms[set->cur], + mapwid, set->maphist, n_hist, n_used); + } + + return score; +} + +static int32 +ngram_model_set_raw_score(ngram_model_t *base, int32 wid, + int32 *history, int32 n_hist, + int32 *n_used) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 mapwid; + int32 score; + int32 i; + + /* Truncate the history. */ + if (n_hist > base->n - 1) + n_hist = base->n - 1; + + /* Interpolate if there is no current. */ + if (set->cur == -1) { + score = base->log_zero; + for (i = 0; i < set->n_models; ++i) { + int32 j; + /* Map word and history IDs for each model. */ + mapwid = set->widmap[wid][i]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][i]; + } + score = logmath_add(base->lmath, score, + set->lweights[i] + + ngram_ng_prob(set->lms[i], + mapwid, set->maphist, n_hist, n_used)); + } + } + else { + int32 j; + /* Map word and history IDs (FIXME: do this in a function?) */ + mapwid = set->widmap[wid][set->cur]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][set->cur]; + } + score = ngram_ng_prob(set->lms[set->cur], + mapwid, set->maphist, n_hist, n_used); + } + + return score; +} + +static int32 +ngram_model_set_add_ug(ngram_model_t *base, + int32 wid, int32 lweight) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 *newwid; + int32 i, prob; + + /* At this point the word has already been added to the master + model and we have a new word ID for it. Add it to active + submodels and track the word IDs. */ + newwid = ckd_calloc(set->n_models, sizeof(*newwid)); + prob = base->log_zero; + for (i = 0; i < set->n_models; ++i) { + int32 wprob, n_hist; + + /* Only add to active models. */ + if (set->cur == -1 || set->cur == i) { + /* Did this word already exist? */ + newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]); + if (newwid[i] == NGRAM_INVALID_WID) { + /* Add it to the submodel. */ + newwid[i] = ngram_model_add_word(set->lms[i], base->word_str[wid], + logmath_exp(base->lmath, lweight)); + if (newwid[i] == NGRAM_INVALID_WID) { + ckd_free(newwid); + return base->log_zero; + } + } + /* Now get the unigram probability for the new word and either + * interpolate it or use it (if this is the current model). */ + wprob = ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist); + if (set->cur == i) + prob = wprob; + else if (set->cur == -1) + prob = logmath_add(base->lmath, prob, set->lweights[i] + wprob); + } + else { + newwid[i] = NGRAM_INVALID_WID; + } + } + /* Okay we have the word IDs for this in all the submodels. Now + do some complicated memory mangling to add this to the + widmap. */ + set->widmap = ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap)); + set->widmap[0] = ckd_realloc(set->widmap[0], + base->n_words + * set->n_models + * sizeof(**set->widmap)); + for (i = 0; i < base->n_words; ++i) + set->widmap[i] = set->widmap[0] + i * set->n_models; + memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid)); + ckd_free(newwid); + return prob; +} + +static void +ngram_model_set_free(ngram_model_t *base) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 i; + + for (i = 0; i < set->n_models; ++i) + ngram_model_free(set->lms[i]); + ckd_free(set->lms); + for (i = 0; i < set->n_models; ++i) + ckd_free(set->names[i]); + ckd_free(set->names); + ckd_free(set->lweights); + ckd_free(set->maphist); + ckd_free_2d((void **)set->widmap); +} + +static void +ngram_model_set_flush(ngram_model_t *base) +{ + ngram_model_set_t *set = (ngram_model_set_t *)base; + int32 i; + + for (i = 0; i < set->n_models; ++i) + ngram_model_flush(set->lms[i]); +} + +static ngram_funcs_t ngram_model_set_funcs = { + ngram_model_set_free, /* free */ + ngram_model_set_apply_weights, /* apply_weights */ + ngram_model_set_score, /* score */ + ngram_model_set_raw_score, /* raw_score */ + ngram_model_set_add_ug, /* add_ug */ + ngram_model_set_flush /* flush */ +}; diff --git a/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h new file mode 100644 index 000000000..5fbc7e5a4 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/lm/ngram_model_set.h @@ -0,0 +1,71 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ngram_model_set.h Set of language models. + * @author David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_SET_H__ +#define __NGRAM_MODEL_SET_H__ + +#include "ngram_model_internal.h" +#include "lm3g_model.h" + +/** + * Subclass of ngram_model for grouping language models. + */ +typedef struct ngram_model_set_s { + ngram_model_t base; /**< Base ngram_model_t structure. */ + + int32 n_models; /**< Number of models in this set. */ + int32 cur; /**< Currently selected model, or -1 for none. */ + ngram_model_t **lms; /**< Language models in this set. */ + char **names; /**< Names for language models. */ + int32 *lweights; /**< Log interpolation weights. */ + int32 **widmap; /**< Word ID mapping for submodels. */ + int32 *maphist; /**< Word ID mapping for N-Gram history. */ +} ngram_model_set_t; + +/** + * Iterator over a model set. + */ +struct ngram_model_set_iter_s { + ngram_model_set_t *set; + int32 cur; +}; + +#endif /* __NGRAM_MODEL_SET_H__ */ diff --git a/media/sphinxbase/src/libsphinxbase/util/bio.c b/media/sphinxbase/src/libsphinxbase/util/bio.c new file mode 100644 index 000000000..56c620f68 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/bio.c @@ -0,0 +1,644 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * bio.c -- Sphinx-3 binary file I/O functions. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.4 2005/06/21 20:40:46 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add the $ keyword. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 02-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Bugfix: Added byteswapping in bio_verify_chksum(). + * + * 18-Dec-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/bio.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + + +#define BIO_HDRARG_MAX 32 +#define END_COMMENT "*end_comment*\n" + + +static void +bcomment_read(FILE * fp) +{ + __BIGSTACKVARIABLE__ char iline[16384]; + + while (fgets(iline, sizeof(iline), fp) != NULL) { + if (strcmp(iline, END_COMMENT) == 0) + return; + } + E_FATAL("Missing %s marker\n", END_COMMENT); +} + + +static int32 +swap_check(FILE * fp) +{ + uint32 magic; + + if (fread(&magic, sizeof(uint32), 1, fp) != 1) { + E_ERROR("Cannot read BYTEORDER MAGIC NO.\n"); + return -1; + } + + if (magic != BYTE_ORDER_MAGIC) { + /* either need to swap or got bogus magic number */ + SWAP_INT32(&magic); + + if (magic == BYTE_ORDER_MAGIC) + return 1; + + SWAP_INT32(&magic); + E_ERROR("Bad BYTEORDER MAGIC NO: %08x, expecting %08x\n", + magic, BYTE_ORDER_MAGIC); + return -1; + } + + return 0; +} + + +void +bio_hdrarg_free(char **argname, char **argval) +{ + int32 i; + + if (argname == NULL) + return; + for (i = 0; argname[i]; i++) { + ckd_free(argname[i]); + ckd_free(argval[i]); + } + ckd_free(argname); + ckd_free(argval); +} + + +int32 +bio_writehdr_version(FILE * fp, char *version) +{ + uint32 b; + + fprintf(fp, "s3\n"); + fprintf(fp, "version %s\n", version); + fprintf(fp, "endhdr\n"); + fflush(fp); + + b = (uint32) BYTE_ORDER_MAGIC; + fwrite(&b, sizeof(uint32), 1, fp); + fflush(fp); + + return 0; +} + + +int32 +bio_writehdr(FILE *fp, ...) +{ + char const *key; + va_list args; + uint32 b; + + fprintf(fp, "s3\n"); + va_start(args, fp); + while ((key = va_arg(args, char const *)) != NULL) { + char const *val = va_arg(args, char const *); + if (val == NULL) { + E_ERROR("Wrong number of arguments\n"); + va_end(args); + return -1; + } + fprintf(fp, "%s %s\n", key, val); + } + va_end(args); + + fprintf(fp, "endhdr\n"); + fflush(fp); + + b = (uint32) BYTE_ORDER_MAGIC; + if (fwrite(&b, sizeof(uint32), 1, fp) != 1) + return -1; + fflush(fp); + + return 0; +} + + +int32 +bio_readhdr(FILE * fp, char ***argname, char ***argval, int32 * swap) +{ + __BIGSTACKVARIABLE__ char line[16384], word[4096]; + int32 i, l; + int32 lineno; + + *argname = (char **) ckd_calloc(BIO_HDRARG_MAX + 1, sizeof(char *)); + *argval = (char **) ckd_calloc(BIO_HDRARG_MAX, sizeof(char *)); + + lineno = 0; + if (fgets(line, sizeof(line), fp) == NULL){ + E_ERROR("Premature EOF, line %d\n", lineno); + goto error_out; + } + lineno++; + + if ((line[0] == 's') && (line[1] == '3') && (line[2] == '\n')) { + /* New format (post Dec-1996, including checksums); read argument-value pairs */ + for (i = 0;;) { + if (fgets(line, sizeof(line), fp) == NULL) { + E_ERROR("Premature EOF, line %d\n", lineno); + goto error_out; + } + lineno++; + + if (sscanf(line, "%s%n", word, &l) != 1) { + E_ERROR("Header format error, line %d\n", lineno); + goto error_out; + } + if (strcmp(word, "endhdr") == 0) + break; + if (word[0] == '#') /* Skip comments */ + continue; + + if (i >= BIO_HDRARG_MAX) { + E_ERROR + ("Max arg-value limit(%d) exceeded; increase BIO_HDRARG_MAX\n", + BIO_HDRARG_MAX); + goto error_out; + } + + (*argname)[i] = ckd_salloc(word); + if (sscanf(line + l, "%s", word) != 1) { /* Multi-word values not allowed */ + E_ERROR("Header format error, line %d\n", lineno); + goto error_out; + } + (*argval)[i] = ckd_salloc(word); + i++; + } + } + else { + /* Old format (without checksums); the first entry must be the version# */ + if (sscanf(line, "%s", word) != 1) { + E_ERROR("Header format error, line %d\n", lineno); + goto error_out; + } + + (*argname)[0] = ckd_salloc("version"); + (*argval)[0] = ckd_salloc(word); + i = 1; + + bcomment_read(fp); + } + (*argname)[i] = NULL; + + if ((*swap = swap_check(fp)) < 0) { + E_ERROR("swap_check failed\n"); + goto error_out; + } + + return 0; +error_out: + bio_hdrarg_free(*argname, *argval); + *argname = *argval = NULL; + return -1; +} + + +static uint32 +chksum_accum(const void *buf, int32 el_sz, int32 n_el, uint32 sum) +{ + int32 i; + uint8 *i8; + uint16 *i16; + uint32 *i32; + + switch (el_sz) { + case 1: + i8 = (uint8 *) buf; + for (i = 0; i < n_el; i++) + sum = (sum << 5 | sum >> 27) + i8[i]; + break; + case 2: + i16 = (uint16 *) buf; + for (i = 0; i < n_el; i++) + sum = (sum << 10 | sum >> 22) + i16[i]; + break; + case 4: + i32 = (uint32 *) buf; + for (i = 0; i < n_el; i++) + sum = (sum << 20 | sum >> 12) + i32[i]; + break; + default: + E_FATAL("Unsupported elemsize for checksum: %d\n", el_sz); + break; + } + + return sum; +} + + +static void +swap_buf(void *buf, int32 el_sz, int32 n_el) +{ + int32 i; + uint16 *buf16; + uint32 *buf32; + + switch (el_sz) { + case 1: + break; + case 2: + buf16 = (uint16 *) buf; + for (i = 0; i < n_el; i++) + SWAP_INT16(buf16 + i); + break; + case 4: + buf32 = (uint32 *) buf; + for (i = 0; i < n_el; i++) + SWAP_INT32(buf32 + i); + break; + default: + E_FATAL("Unsupported elemsize for byteswapping: %d\n", el_sz); + break; + } +} + + +int32 +bio_fread(void *buf, int32 el_sz, int32 n_el, FILE * fp, int32 swap, + uint32 * chksum) +{ + if (fread(buf, el_sz, n_el, fp) != (size_t) n_el) + return -1; + + if (swap) + swap_buf(buf, el_sz, n_el); + + if (chksum) + *chksum = chksum_accum(buf, el_sz, n_el, *chksum); + + return n_el; +} + +int32 +bio_fwrite(const void *buf, int32 el_sz, int32 n_el, FILE *fp, + int32 swap, uint32 *chksum) +{ + if (chksum) + *chksum = chksum_accum(buf, el_sz, n_el, *chksum); + if (swap) { + void *nbuf; + int rv; + + nbuf = ckd_calloc(n_el, el_sz); + memcpy(nbuf, buf, n_el * el_sz); + swap_buf(nbuf, el_sz, n_el); + rv = fwrite(nbuf, el_sz, n_el, fp); + ckd_free(nbuf); + return rv; + } + else { + return fwrite(buf, el_sz, n_el, fp); + } +} + +int32 +bio_fread_1d(void **buf, size_t el_sz, uint32 * n_el, FILE * fp, + int32 sw, uint32 * ck) +{ + /* Read 1-d array size */ + if (bio_fread(n_el, sizeof(int32), 1, fp, sw, ck) != 1) + E_FATAL("fread(arraysize) failed\n"); + if (*n_el <= 0) + E_FATAL("Bad arraysize: %d\n", *n_el); + + /* Allocate memory for array data */ + *buf = (void *) ckd_calloc(*n_el, el_sz); + + /* Read array data */ + if (bio_fread(*buf, el_sz, *n_el, fp, sw, ck) != *n_el) + E_FATAL("fread(arraydata) failed\n"); + + return *n_el; +} + +int32 +bio_fread_2d(void ***arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + FILE *fp, + uint32 swap, + uint32 *chksum) +{ + uint32 l_d1, l_d2; + uint32 n; + size_t ret; + void *raw; + + ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_2d"); + } + return -1; + } + ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_2d"); + } + return -1; + } + if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) + return -1; + + assert(n == l_d1*l_d2); + + *d1 = l_d1; + *d2 = l_d2; + *arr = ckd_alloc_2d_ptr(l_d1, l_d2, raw, e_sz); + + return n; +} + +int32 +bio_fread_3d(void ****arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + uint32 *d3, + FILE *fp, + uint32 swap, + uint32 *chksum) +{ + uint32 l_d1; + uint32 l_d2; + uint32 l_d3; + uint32 n; + void *raw; + size_t ret; + + ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_3d"); + } + return -1; + } + ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_3d"); + } + return -1; + } + ret = bio_fread(&l_d3, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_3d"); + } + return -1; + } + + if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) { + return -1; + } + + assert(n == l_d1 * l_d2 * l_d3); + + *arr = ckd_alloc_3d_ptr(l_d1, l_d2, l_d3, raw, e_sz); + *d1 = l_d1; + *d2 = l_d2; + *d3 = l_d3; + + return n; +} + +void +bio_verify_chksum(FILE * fp, int32 byteswap, uint32 chksum) +{ + uint32 file_chksum; + + if (fread(&file_chksum, sizeof(uint32), 1, fp) != 1) + E_FATAL("fread(chksum) failed\n"); + if (byteswap) + SWAP_INT32(&file_chksum); + if (file_chksum != chksum) + E_FATAL + ("Checksum error; file-checksum %08x, computed %08x\n", + file_chksum, chksum); +} + +int +bio_fwrite_3d(void ***arr, + size_t e_sz, + uint32 d1, + uint32 d2, + uint32 d3, + FILE *fp, + uint32 *chksum) +{ + size_t ret; + + /* write out first dimension 1 */ + ret = bio_fwrite(&d1, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_3d"); + } + return -1; + } + + /* write out first dimension 2 */ + ret = bio_fwrite(&d2, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_3d"); + } + return -1; + } + + /* write out first dimension 3 */ + ret = bio_fwrite(&d3, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_3d"); + } + return -1; + } + + /* write out the data in the array as one big block */ + return bio_fwrite_1d(arr[0][0], e_sz, d1 * d2 * d3, fp, chksum); +} + +int +bio_fwrite_1d(void *arr, + size_t e_sz, + uint32 d1, + FILE *fp, + uint32 *chksum) +{ + size_t ret; + ret = bio_fwrite(&d1, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_1d"); + } + return -1; + } + + ret = bio_fwrite(arr, e_sz, d1, fp, 0, chksum); + if (ret != d1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_1d"); + } + + return -1; + } + + return ret; +} + +int16* +bio_read_wavfile(char const *directory, + char const *filename, + char const *extension, + int32 header, + int32 endian, + size_t *nsamps) +{ + FILE *uttfp; + char *inputfile; + size_t n, l; + int16 *data; + + n = strlen(extension); + l = strlen(filename); + if ((n <= l) && (0 == strcmp(filename + l - n, extension))) + extension = ""; + inputfile = ckd_calloc(strlen(directory) + l + n + 2, 1); + if (directory) { + sprintf(inputfile, "%s/%s%s", directory, filename, extension); + } else { + sprintf(inputfile, "%s%s", filename, extension); + } + + if ((uttfp = fopen(inputfile, "rb")) == NULL) { + E_FATAL_SYSTEM("Failed to open file '%s' for reading", inputfile); + } + fseek(uttfp, 0, SEEK_END); + n = ftell(uttfp); + fseek(uttfp, 0, SEEK_SET); + if (header > 0) { + if (fseek(uttfp, header, SEEK_SET) < 0) { + E_ERROR_SYSTEM("Failed to move to an offset %d in a file '%s'", header, inputfile); + fclose(uttfp); + ckd_free(inputfile); + return NULL; + } + n -= header; + } + n /= sizeof(int16); + data = ckd_calloc(n, sizeof(*data)); + if ((l = fread(data, sizeof(int16), n, uttfp)) < n) { + E_ERROR_SYSTEM("Failed to read %d samples from %s: %d", n, inputfile, l); + ckd_free(data); + ckd_free(inputfile); + fclose(uttfp); + return NULL; + } + ckd_free(inputfile); + fclose(uttfp); + if (nsamps) *nsamps = n; + + return data; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/bitvec.c b/media/sphinxbase/src/libsphinxbase/util/bitvec.c new file mode 100644 index 000000000..2d139010e --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/bitvec.c @@ -0,0 +1,101 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * bitvec.c -- Bit vector type. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: bitvec.c,v $ + * Revision 1.4 2005/06/22 02:58:22 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Started. + */ + + +#include "sphinxbase/bitvec.h" + +bitvec_t * +bitvec_realloc(bitvec_t *vec, + size_t old_len, + size_t new_len) +{ + bitvec_t *new_vec; + size_t old_size = bitvec_size(old_len); + size_t new_size = bitvec_size(new_len); + + new_vec = ckd_realloc(vec, new_size * sizeof(bitvec_t)); + if (new_size > old_size) + memset(new_vec + old_size, 0, (new_size - old_size) * sizeof(bitvec_t)); + + return new_vec; +} + +size_t +bitvec_count_set(bitvec_t *vec, size_t len) +{ + size_t words, bits, w, b, n; + bitvec_t *v; + + words = len / BITVEC_BITS; + bits = len % BITVEC_BITS; + v = vec; + n = 0; + for (w = 0; w < words; ++w, ++v) { + if (*v == 0) + continue; + for (b = 0; b < BITVEC_BITS; ++b) + if (*v & (1<= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* + EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or + upper case 'Z'. +*/ + + if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta + >= 162 && inta <= 169) { + inta += 64; + } + if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb + >= 162 && intb <= 169) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* + ASCII is assumed, on Prime machines - ZCODE is the ASCII code + plus 128 of either lower or upper case 'Z'. +*/ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* + RETURN + + End of LSAME +*/ + + return ret_val; +} /* lsame_ */ + +doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy) +{ + /* System generated locals */ + integer i__1; + real ret_val; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static real stemp; + + +/* + forms the dot product of two vectors. + uses unrolled loops for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + stemp = 0.f; + ret_val = 0.f; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[ix] * sy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[i__] * sy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[ + i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + + 4] * sy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = stemp; + return ret_val; +} /* sdot_ */ + +/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static real temp; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X', + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Parameters + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = A'. + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = B'. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NOTA and NOTB as true if A and B respectively are not + transposed and set NROWA, NCOLA and NROWB as the number of rows + and columns of A and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, "C") && ! lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! lsame_(transb, "C") && ! + lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("SGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.f) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L130: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.f) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of SGEMM . */ + +} /* sgemm_ */ + +/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Parameters + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - REAL array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - REAL array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SGEMV . */ + +} /* sgemv_ */ + +/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, m, mp1, nincx; + + +/* + scales a vector by a constant. + uses unrolled loops for increment equal to 1. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + sx[i__] = *sa * sx[i__]; +/* L10: */ + } + return 0; + +/* + code for increment equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + sx[i__] = *sa * sx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + sx[i__] = *sa * sx[i__]; + sx[i__ + 1] = *sa * sx[i__ + 1]; + sx[i__ + 2] = *sa * sx[i__ + 2]; + sx[i__ + 3] = *sa * sx[i__ + 3]; + sx[i__ + 4] = *sa * sx[i__ + 4]; +/* L50: */ + } + return 0; +} /* sscal_ */ + +/* Subroutine */ int ssymm_(char *side, char *uplo, integer *m, integer *n, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYMM performs one of the matrix-matrix operations + + C := alpha*A*B + beta*C, + + or + + C := alpha*B*A + beta*C, + + where alpha and beta are scalars, A is a symmetric matrix and B and + C are m by n matrices. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether the symmetric matrix A + appears on the left or right in the operation as follows: + + SIDE = 'L' or 'l' C := alpha*A*B + beta*C, + + SIDE = 'R' or 'r' C := alpha*B*A + beta*C, + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the symmetric matrix A is to be + referenced as follows: + + UPLO = 'U' or 'u' Only the upper triangular part of the + symmetric matrix is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of the + symmetric matrix is to be referenced. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix C. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix C. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + m when SIDE = 'L' or 'l' and is n otherwise. + Before entry with SIDE = 'L' or 'l', the m by m part of + the array A must contain the symmetric matrix, such that + when UPLO = 'U' or 'u', the leading m by m upper triangular + part of the array A must contain the upper triangular part + of the symmetric matrix and the strictly lower triangular + part of A is not referenced, and when UPLO = 'L' or 'l', + the leading m by m lower triangular part of the array A + must contain the lower triangular part of the symmetric + matrix and the strictly upper triangular part of A is not + referenced. + Before entry with SIDE = 'R' or 'r', the n by n part of + the array A must contain the symmetric matrix, such that + when UPLO = 'U' or 'u', the leading n by n upper triangular + part of the array A must contain the upper triangular part + of the symmetric matrix and the strictly lower triangular + part of A is not referenced, and when UPLO = 'L' or 'l', + the leading n by n lower triangular part of the array A + must contain the lower triangular part of the symmetric + matrix and the strictly upper triangular part of A is not + referenced. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n updated + matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NROWA as the number of rows of A. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(side, "L")) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = lsame_(uplo, "U"); + +/* Test the input parameters. */ + + info = 0; + if (! lsame_(side, "L") && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,*m)) { + info = 9; + } else if (*ldc < max(1,*m)) { + info = 12; + } + if (info != 0) { + xerbla_("SSYMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(side, "L")) { + +/* Form C := alpha*A*B + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.f; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L50: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L60: */ + } +/* L70: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.f; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L80: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form C := alpha*B*A + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * a[j + j * a_dim1]; + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1]; +/* L110: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * b[i__ + j * b_dim1]; +/* L120: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[k + j * a_dim1]; + } else { + temp1 = *alpha * a[j + k * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L130: */ + } +/* L140: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[j + k * a_dim1]; + } else { + temp1 = *alpha * a[k + j * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L150: */ + } +/* L160: */ + } +/* L170: */ + } + } + + return 0; + +/* End of SSYMM . */ + +} /* ssymm_ */ + +/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *beta, real *c__, integer * + ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, info; + static real temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYRK performs one of the symmetric rank k operations + + C := alpha*A*A' + beta*C, + + or + + C := alpha*A'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*A + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("SSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of SSYRK . */ + +} /* ssyrk_ */ + +/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + The matrix X is overwritten on B. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("STRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of STRSM . */ + +} /* strsm_ */ + +/* Subroutine */ int xerbla_(char *srname, integer *info) +{ + /* Format strings */ + static char fmt_9999[] = "(\002 ** On entry to \002,a6,\002 parameter nu" + "mber \002,i2,\002 had \002,\002an illegal value\002)"; + + /* Builtin functions */ + integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); + /* Subroutine */ int s_stop(char *, ftnlen); + + /* Fortran I/O blocks */ + static cilist io___60 = { 0, 6, 0, fmt_9999, 0 }; + + +/* + -- LAPACK auxiliary routine (preliminary version) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + XERBLA is an error handler for the LAPACK routines. + It is called by an LAPACK routine if an input parameter has an + invalid value. A message is printed and execution stops. + + Installers may consider modifying the STOP statement in order to + call system-specific exception-handling facilities. + + Arguments + ========= + + SRNAME (input) CHARACTER*6 + The name of the routine which called XERBLA. + + INFO (input) INTEGER + The position of the invalid parameter in the parameter list + of the calling routine. +*/ + + + s_wsfe(&io___60); + do_fio(&c__1, srname, (ftnlen)6); + do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); + e_wsfe(); + + s_stop("", (ftnlen)0); + + +/* End of XERBLA */ + + return 0; +} /* xerbla_ */ + diff --git a/media/sphinxbase/src/libsphinxbase/util/case.c b/media/sphinxbase/src/libsphinxbase/util/case.c new file mode 100644 index 000000000..f9e24ac06 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/case.c @@ -0,0 +1,141 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * case.c -- Upper/lower case conversion routines + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: case.c,v $ + * Revision 1.7 2005/06/22 02:58:54 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added strcmp_nocase. Moved UPPER_CASE and LOWER_CASE definitions to .h. + * + * 16-Feb-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +#include +#include + +#include "sphinxbase/case.h" +#include "sphinxbase/err.h" + + +void +lcase(register char *cp) +{ + if (cp) { + while (*cp) { + *cp = LOWER_CASE(*cp); + cp++; + } + } +} + +void +ucase(register char *cp) +{ + if (cp) { + while (*cp) { + *cp = UPPER_CASE(*cp); + cp++; + } + } +} + +int32 +strcmp_nocase(const char *str1, const char *str2) +{ + char c1, c2; + + if (str1 == str2) + return 0; + if (str1 && str2) { + for (;;) { + c1 = *(str1++); + c1 = UPPER_CASE(c1); + c2 = *(str2++); + c2 = UPPER_CASE(c2); + if (c1 != c2) + return (c1 - c2); + if (c1 == '\0') + return 0; + } + } + else + return (str1 == NULL) ? -1 : 1; + + return 0; +} + +int32 +strncmp_nocase(const char *str1, const char *str2, size_t len) +{ + char c1, c2; + + if (str1 && str2) { + size_t n; + + for (n = 0; n < len; ++n) { + c1 = *(str1++); + c1 = UPPER_CASE(c1); + c2 = *(str2++); + c2 = UPPER_CASE(c2); + if (c1 != c2) + return (c1 - c2); + if (c1 == '\0') + return 0; + } + } + else + return (str1 == NULL) ? -1 : 1; + + return 0; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/ckd_alloc.c b/media/sphinxbase/src/libsphinxbase/util/ckd_alloc.c new file mode 100644 index 000000000..45dc84aae --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/ckd_alloc.c @@ -0,0 +1,427 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * ckd_alloc.c -- Memory allocation package. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: ckd_alloc.c,v $ + * Revision 1.6 2005/06/22 02:59:25 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Removed file,line arguments from free functions. + * Removed debugging stuff. + * + * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +/********************************************************************* + * + * $Header: /cvsroot/cmusphinx/sphinx3/src/libutil/ckd_alloc.c,v 1.6 2005/06/22 02:59:25 arthchan2003 Exp $ + * + * Carnegie Mellon ARPA Speech Group + * + * Copyright (c) 1994 Carnegie Mellon University. + * All rights reserved. + * + ********************************************************************* + * + * file: ckd_alloc.c + * + * traceability: + * + * description: + * + * author: + * + *********************************************************************/ + + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +/** + * Target for longjmp() on failure. + * + * FIXME: This should be in thread-local storage. + */ +static jmp_buf *ckd_target; +static int jmp_abort; + +jmp_buf * +ckd_set_jump(jmp_buf *env, int abort) +{ + jmp_buf *old; + + if (abort) + jmp_abort = 1; + + old = ckd_target; + ckd_target = env; + return old; +} + +void +ckd_fail(char *format, ...) +{ + va_list args; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + if (jmp_abort) + /* abort() doesn't exist in Windows CE */ + #if defined(_WIN32_WCE) + exit(-1); + #else + abort(); + #endif + else if (ckd_target) + longjmp(*ckd_target, 1); + else + exit(-1); +} + +void * +__ckd_calloc__(size_t n_elem, size_t elem_size, + const char *caller_file, int caller_line) +{ + void *mem; + +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if ((mem = heap_calloc(heap_lookup(1),n_elem, elem_size)) == NULL) + if ((mem = heap_calloc(heap_lookup(0),n_elem, elem_size)) == NULL) + { + ckd_fail("calloc(%d,%d) failed from %s(%d), free space: %d\n", n_elem, + elem_size, caller_file, caller_line,space_unused()); + } +#else + if ((mem = calloc(n_elem, elem_size)) == NULL) { + ckd_fail("calloc(%d,%d) failed from %s(%d)\n", n_elem, + elem_size, caller_file, caller_line); + } +#endif + + + return mem; +} + + +void * +__ckd_malloc__(size_t size, const char *caller_file, int caller_line) +{ + void *mem; + +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if ((mem = heap_malloc(heap_lookup(0),size)) == NULL) + if ((mem = heap_malloc(heap_lookup(1),size)) == NULL) +#else + if ((mem = malloc(size)) == NULL) +#endif + ckd_fail("malloc(%d) failed from %s(%d)\n", size, + caller_file, caller_line); + + return mem; +} + + +void * +__ckd_realloc__(void *ptr, size_t new_size, + const char *caller_file, int caller_line) +{ + void *mem; +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if ((mem = heap_realloc(heap_lookup(0),ptr, new_size)) == NULL) { +#else + if ((mem = realloc(ptr, new_size)) == NULL) { +#endif + ckd_fail("malloc(%d) failed from %s(%d)\n", new_size, + caller_file, caller_line); + } + + return mem; +} + + +char * +__ckd_salloc__(const char *orig, const char *caller_file, + int caller_line) +{ + size_t len; + char *buf; + + if (!orig) + return NULL; + + len = strlen(orig) + 1; + buf = (char *) __ckd_malloc__(len, caller_file, caller_line); + + strcpy(buf, orig); + return (buf); +} + + +void * +__ckd_calloc_2d__(size_t d1, size_t d2, size_t elemsize, + const char *caller_file, int caller_line) +{ + char **ref, *mem; + size_t i, offset; + + mem = + (char *) __ckd_calloc__(d1 * d2, elemsize, caller_file, + caller_line); + ref = + (char **) __ckd_malloc__(d1 * sizeof(void *), caller_file, + caller_line); + + for (i = 0, offset = 0; i < d1; i++, offset += d2 * elemsize) + ref[i] = mem + offset; + + return ref; +} + + +void +ckd_free(void *ptr) +{ +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if (ptr) + heap_free(0,ptr); +#else + free(ptr); +#endif +} + +void +ckd_free_2d(void *tmpptr) +{ + void **ptr = (void **)tmpptr; + if (ptr) + ckd_free(ptr[0]); + ckd_free(ptr); +} + + +void * +__ckd_calloc_3d__(size_t d1, size_t d2, size_t d3, size_t elemsize, + const char *caller_file, int caller_line) +{ + char ***ref1, **ref2, *mem; + size_t i, j, offset; + + mem = + (char *) __ckd_calloc__(d1 * d2 * d3, elemsize, caller_file, + caller_line); + ref1 = + (char ***) __ckd_malloc__(d1 * sizeof(void **), caller_file, + caller_line); + ref2 = + (char **) __ckd_malloc__(d1 * d2 * sizeof(void *), caller_file, + caller_line); + + for (i = 0, offset = 0; i < d1; i++, offset += d2) + ref1[i] = ref2 + offset; + + offset = 0; + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + ref1[i][j] = mem + offset; + offset += d3 * elemsize; + } + } + + return ref1; +} + + +void +ckd_free_3d(void *inptr) +{ + void ***ptr = (void ***)inptr; + + if (ptr && ptr[0]) + ckd_free(ptr[0][0]); + if (ptr) + ckd_free(ptr[0]); + ckd_free(ptr); +} + + +void **** +__ckd_calloc_4d__(size_t d1, + size_t d2, + size_t d3, + size_t d4, + size_t elem_size, + char *file, + int line) +{ + void *store; + void **tmp1; + void ***tmp2; + void ****out; + size_t i, j; + + store = calloc(d1 * d2 * d3 * d4, elem_size); + if (store == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + tmp1 = calloc(d1 * d2 * d3, sizeof(void *)); + if (tmp1 == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + tmp2 = ckd_calloc(d1 * d2, sizeof(void **)); + if (tmp2 == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + out = ckd_calloc(d1, sizeof(void ***)); + if (out == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + for (i = 0, j = 0; i < d1*d2*d3; i++, j += d4) { + tmp1[i] = &((char *)store)[j*elem_size]; + } + + for (i = 0, j = 0; i < d1*d2; i++, j += d3) { + tmp2[i] = &tmp1[j]; + } + + for (i = 0, j = 0; i < d1; i++, j += d2) { + out[i] = &tmp2[j]; + } + + return out; +} + +void +ckd_free_4d(void *inptr) +{ + void ****ptr = (void ****)inptr; + if (ptr == NULL) + return; + /* free the underlying store */ + ckd_free(ptr[0][0][0]); + + /* free the access overhead */ + ckd_free(ptr[0][0]); + ckd_free(ptr[0]); + ckd_free(ptr); +} + +/* Layers a 3d array access structure over a preallocated storage area */ +void * +__ckd_alloc_3d_ptr(size_t d1, + size_t d2, + size_t d3, + void *store, + size_t elem_size, + char *file, + int line) +{ + void **tmp1; + void ***out; + size_t i, j; + + tmp1 = __ckd_calloc__(d1 * d2, sizeof(void *), file, line); + + out = __ckd_calloc__(d1, sizeof(void **), file, line); + + for (i = 0, j = 0; i < d1*d2; i++, j += d3) { + tmp1[i] = &((char *)store)[j*elem_size]; + } + + for (i = 0, j = 0; i < d1; i++, j += d2) { + out[i] = &tmp1[j]; + } + + return out; +} + +void * +__ckd_alloc_2d_ptr(size_t d1, + size_t d2, + void *store, + size_t elem_size, + char *file, + int line) +{ + void **out; + size_t i, j; + + out = __ckd_calloc__(d1, sizeof(void *), file, line); + + for (i = 0, j = 0; i < d1; i++, j += d2) { + out[i] = &((char *)store)[j*elem_size]; + } + + return out; +} + +/* vim: set ts=4 sw=4: */ diff --git a/media/sphinxbase/src/libsphinxbase/util/cmd_ln.c b/media/sphinxbase/src/libsphinxbase/util/cmd_ln.c new file mode 100644 index 000000000..962482995 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/cmd_ln.c @@ -0,0 +1,1082 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmd_ln.c -- Command line argument parsing. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 10-Sep-1998 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Changed strcasecmp() call in cmp_name() to strcmp_nocase() call. + * + * 15-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added required arguments handling. + * + * 07-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created, based on Eric's implementation. Basically, combined several + * functions into one, eliminated validation, and simplified the interface. + */ + + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996 4018) +#endif + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "sphinxbase/cmd_ln.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/case.h" +#include "sphinxbase/strfuncs.h" + +typedef struct cmd_ln_val_s { + anytype_t val; + int type; +} cmd_ln_val_t; + +struct cmd_ln_s { + int refcount; + hash_table_t *ht; + char **f_argv; + uint32 f_argc; +}; + +/** Global command-line, for non-reentrant API. */ +cmd_ln_t *global_cmdln; + +static void +arg_dump_r(cmd_ln_t *, FILE *, arg_t const *, int32); + +static cmd_ln_t * +parse_options(cmd_ln_t *, const arg_t *, int32, char* [], int32); + +/* + * Find max length of name and default fields in the given defn array. + * Return #items in defn array. + */ +static int32 +arg_strlen(const arg_t * defn, int32 * namelen, int32 * deflen) +{ + int32 i, l; + + *namelen = *deflen = 0; + for (i = 0; defn[i].name; i++) { + l = strlen(defn[i].name); + if (*namelen < l) + *namelen = l; + + if (defn[i].deflt) + l = strlen(defn[i].deflt); + else + l = strlen("(null)"); + /* E_INFO("string default, %s , name %s, length %d\n",defn[i].deflt,defn[i].name,l); */ + if (*deflen < l) + *deflen = l; + } + + return i; +} + + +static int32 +cmp_name(const void *a, const void *b) +{ + return (strcmp_nocase + ((* (arg_t**) a)->name, + (* (arg_t**) b)->name)); +} + +static arg_t const ** +arg_sort(const arg_t * defn, int32 n) +{ + const arg_t ** pos; + int32 i; + + pos = (arg_t const **) ckd_calloc(n, sizeof(arg_t *)); + for (i = 0; i < n; ++i) + pos[i] = &defn[i]; + qsort(pos, n, sizeof(arg_t *), cmp_name); + + return pos; +} + +static size_t +strnappend(char **dest, size_t *dest_allocation, + const char *source, size_t n) +{ + size_t source_len, required_allocation; + + if (dest == NULL || dest_allocation == NULL) + return -1; + if (*dest == NULL && *dest_allocation != 0) + return -1; + if (source == NULL) + return *dest_allocation; + + source_len = strlen(source); + if (n && n < source_len) + source_len = n; + + required_allocation = (*dest ? strlen(*dest) : 0) + source_len + 1; + if (*dest_allocation < required_allocation) { + if (*dest_allocation == 0) { + *dest = (char *)ckd_calloc(required_allocation * 2, 1); + } else { + *dest = (char *)ckd_realloc(*dest, required_allocation * 2); + } + *dest_allocation = required_allocation * 2; + } + + strncat(*dest, source, source_len); + + return *dest_allocation; +} + +static size_t +strappend(char **dest, size_t *dest_allocation, + const char *source) +{ + return strnappend(dest, dest_allocation, source, 0); +} + +static char* +arg_resolve_env(const char *str) +{ + char *resolved_str = NULL; + char env_name[100]; + const char *env_val; + size_t alloced = 0; + const char *i = str, *j; + + /* calculate required resolved_str size */ + do { + j = strstr(i, "$("); + if (j != NULL) { + if (j != i) { + strnappend(&resolved_str, &alloced, i, j - i); + i = j; + } + j = strchr(i + 2, ')'); + if (j != NULL) { + if (j - (i + 2) < 100) { + strncpy(env_name, i + 2, j - (i + 2)); + env_name[j - (i + 2)] = '\0'; + #if !defined(_WIN32_WCE) + env_val = getenv(env_name); + if (env_val) + strappend(&resolved_str, &alloced, env_val); + #else + env_val = 0; + #endif + } + i = j + 1; + } else { + /* unclosed, copy and skip */ + j = i + 2; + strnappend(&resolved_str, &alloced, i, j - i); + i = j; + } + } else { + strappend(&resolved_str, &alloced, i); + } + } while(j != NULL); + + return resolved_str; +} + +static void +arg_dump_r(cmd_ln_t *cmdln, FILE *fp, const arg_t * defn, int32 doc) +{ + arg_t const **pos; + int32 i, n; + size_t l; + int32 namelen, deflen; + anytype_t *vp; + char const **array; + + /* No definitions, do nothing. */ + if (defn == NULL || fp == NULL) + return; + + /* Find max lengths of name and default value fields, and #entries in defn */ + n = arg_strlen(defn, &namelen, &deflen); + /* E_INFO("String length %d. Name length %d, Default Length %d\n",n, namelen, deflen); */ + namelen = namelen & 0xfffffff8; /* Previous tab position */ + deflen = deflen & 0xfffffff8; /* Previous tab position */ + + fprintf(fp, "[NAME]"); + for (l = strlen("[NAME]"); l < namelen; l += 8) + fprintf(fp, "\t"); + fprintf(fp, "\t[DEFLT]"); + for (l = strlen("[DEFLT]"); l < deflen; l += 8) + fprintf(fp, "\t"); + + if (doc) { + fprintf(fp, "\t[DESCR]\n"); + } + else { + fprintf(fp, "\t[VALUE]\n"); + } + + /* Print current configuration, sorted by name */ + pos = arg_sort(defn, n); + for (i = 0; i < n; i++) { + fprintf(fp, "%s", pos[i]->name); + for (l = strlen(pos[i]->name); l < namelen; l += 8) + fprintf(fp, "\t"); + + fprintf(fp, "\t"); + if (pos[i]->deflt) { + fprintf(fp, "%s", pos[i]->deflt); + l = strlen(pos[i]->deflt); + } + else + l = 0; + for (; l < deflen; l += 8) + fprintf(fp, "\t"); + + fprintf(fp, "\t"); + if (doc) { + if (pos[i]->doc) + fprintf(fp, "%s", pos[i]->doc); + } + else { + vp = cmd_ln_access_r(cmdln, pos[i]->name); + if (vp) { + switch (pos[i]->type) { + case ARG_INTEGER: + case REQARG_INTEGER: + fprintf(fp, "%ld", vp->i); + break; + case ARG_FLOATING: + case REQARG_FLOATING: + fprintf(fp, "%e", vp->fl); + break; + case ARG_STRING: + case REQARG_STRING: + if (vp->ptr) + fprintf(fp, "%s", (char *)vp->ptr); + break; + case ARG_STRING_LIST: + array = (char const**)vp->ptr; + if (array) + for (l = 0; array[l] != 0; l++) { + fprintf(fp, "%s,", array[l]); + } + break; + case ARG_BOOLEAN: + case REQARG_BOOLEAN: + fprintf(fp, "%s", vp->i ? "yes" : "no"); + break; + default: + E_ERROR("Unknown argument type: %d\n", pos[i]->type); + } + } + } + + fprintf(fp, "\n"); + } + ckd_free(pos); + + fprintf(fp, "\n"); +} + +static char ** +parse_string_list(const char *str) +{ + int count, i, j; + const char *p; + char **result; + + p = str; + count = 1; + while (*p) { + if (*p == ',') + count++; + p++; + } + /* Should end with NULL */ + result = (char **) ckd_calloc(count + 1, sizeof(char *)); + p = str; + for (i = 0; i < count; i++) { + for (j = 0; p[j] != ',' && p[j] != 0; j++); + result[i] = (char *)ckd_calloc(j + 1, sizeof(char)); + strncpy( result[i], p, j); + p = p + j + 1; + } + return result; +} + +static cmd_ln_val_t * +cmd_ln_val_init(int t, const char *str) +{ + cmd_ln_val_t *v; + anytype_t val; + char *e_str; + + if (!str) { + /* For lack of a better default value. */ + memset(&val, 0, sizeof(val)); + } + else { + int valid = 1; + e_str = arg_resolve_env(str); + + switch (t) { + case ARG_INTEGER: + case REQARG_INTEGER: + if (sscanf(e_str, "%ld", &val.i) != 1) + valid = 0; + break; + case ARG_FLOATING: + case REQARG_FLOATING: + if (e_str == NULL || e_str[0] == 0) + valid = 0; + val.fl = atof_c(e_str); + break; + case ARG_BOOLEAN: + case REQARG_BOOLEAN: + if ((e_str[0] == 'y') || (e_str[0] == 't') || + (e_str[0] == 'Y') || (e_str[0] == 'T') || (e_str[0] == '1')) { + val.i = TRUE; + } + else if ((e_str[0] == 'n') || (e_str[0] == 'f') || + (e_str[0] == 'N') || (e_str[0] == 'F') | + (e_str[0] == '0')) { + val.i = FALSE; + } + else { + E_ERROR("Unparsed boolean value '%s'\n", str); + valid = 0; + } + break; + case ARG_STRING: + case REQARG_STRING: + val.ptr = ckd_salloc(e_str); + break; + case ARG_STRING_LIST: + val.ptr = parse_string_list(e_str); + break; + default: + E_ERROR("Unknown argument type: %d\n", t); + valid = 0; + } + + ckd_free(e_str); + if (valid == 0) + return NULL; + } + + v = (cmd_ln_val_t *)ckd_calloc(1, sizeof(*v)); + memcpy(v, &val, sizeof(val)); + v->type = t; + + return v; +} + +/* + * Handles option parsing for cmd_ln_parse_file_r() and cmd_ln_init() + * also takes care of storing argv. + * DO NOT call it from cmd_ln_parse_r() + */ +static cmd_ln_t * +parse_options(cmd_ln_t *cmdln, const arg_t *defn, int32 argc, char* argv[], int32 strict) +{ + cmd_ln_t *new_cmdln; + + new_cmdln = cmd_ln_parse_r(cmdln, defn, argc, argv, strict); + /* If this failed then clean up and return NULL. */ + if (new_cmdln == NULL) { + int32 i; + for (i = 0; i < argc; ++i) + ckd_free(argv[i]); + ckd_free(argv); + return NULL; + } + + /* Otherwise, we need to add the contents of f_argv to the new object. */ + if (new_cmdln == cmdln) { + /* If we are adding to a previously passed-in cmdln, then + * store our allocated strings in its f_argv. */ + new_cmdln->f_argv = (char **)ckd_realloc(new_cmdln->f_argv, + (new_cmdln->f_argc + argc) + * sizeof(*new_cmdln->f_argv)); + memcpy(new_cmdln->f_argv + new_cmdln->f_argc, argv, + argc * sizeof(*argv)); + ckd_free(argv); + new_cmdln->f_argc += argc; + } + else { + /* Otherwise, store f_argc and f_argv. */ + new_cmdln->f_argc = argc; + new_cmdln->f_argv = argv; + } + + return new_cmdln; +} + +void +cmd_ln_val_free(cmd_ln_val_t *val) +{ + int i; + if (val->type & ARG_STRING_LIST) { + char ** array = (char **)val->val.ptr; + if (array) { + for (i = 0; array[i] != NULL; i++) { + ckd_free(array[i]); + } + ckd_free(array); + } + } + if (val->type & ARG_STRING) + ckd_free(val->val.ptr); + ckd_free(val); +} + +cmd_ln_t * +cmd_ln_get(void) +{ + return global_cmdln; +} + +void +cmd_ln_appl_enter(int argc, char *argv[], + const char *default_argfn, + const arg_t * defn) +{ + /* Look for default or specified arguments file */ + const char *str; + + str = NULL; + + if ((argc == 2) && (strcmp(argv[1], "help") == 0)) { + cmd_ln_print_help(stderr, defn); + exit(1); + } + + if ((argc == 2) && (argv[1][0] != '-')) + str = argv[1]; + else if (argc == 1) { + FILE *fp; + E_INFO("Looking for default argument file: %s\n", default_argfn); + + if ((fp = fopen(default_argfn, "r")) == NULL) { + E_INFO("Can't find default argument file %s.\n", + default_argfn); + } + else { + str = default_argfn; + } + if (fp != NULL) + fclose(fp); + } + + + if (str) { + /* Build command line argument list from file */ + E_INFO("Parsing command lines from file %s\n", str); + if (cmd_ln_parse_file(defn, str, TRUE)) { + E_INFOCONT("Usage:\n"); + E_INFOCONT("\t%s argument-list, or\n", argv[0]); + E_INFOCONT("\t%s [argument-file] (default file: . %s)\n\n", + argv[0], default_argfn); + cmd_ln_print_help(stderr, defn); + exit(1); + } + } + else { + cmd_ln_parse(defn, argc, argv, TRUE); + } +} + +void +cmd_ln_appl_exit() +{ + cmd_ln_free(); +} + + +cmd_ln_t * +cmd_ln_parse_r(cmd_ln_t *inout_cmdln, const arg_t * defn, int32 argc, char *argv[], int strict) +{ + int32 i, j, n, argstart; + hash_table_t *defidx = NULL; + cmd_ln_t *cmdln; + + /* Construct command-line object */ + if (inout_cmdln == NULL) { + cmdln = (cmd_ln_t*)ckd_calloc(1, sizeof(*cmdln)); + cmdln->refcount = 1; + } + else + cmdln = inout_cmdln; + + /* Build a hash table for argument definitions */ + defidx = hash_table_new(50, 0); + if (defn) { + for (n = 0; defn[n].name; n++) { + void *v; + + v = hash_table_enter(defidx, defn[n].name, (void *)&defn[n]); + if (strict && (v != &defn[n])) { + E_ERROR("Duplicate argument name in definition: %s\n", defn[n].name); + goto error; + } + } + } + else { + /* No definitions. */ + n = 0; + } + + /* Allocate memory for argument values */ + if (cmdln->ht == NULL) + cmdln->ht = hash_table_new(n, 0 /* argument names are case-sensitive */ ); + + + /* skip argv[0] if it doesn't start with dash */ + argstart = 0; + if (argc > 0 && argv[0][0] != '-') { + argstart = 1; + } + + /* Parse command line arguments (name-value pairs) */ + for (j = argstart; j < argc; j += 2) { + arg_t *argdef; + cmd_ln_val_t *val; + void *v; + + if (hash_table_lookup(defidx, argv[j], &v) < 0) { + if (strict) { + E_ERROR("Unknown argument name '%s'\n", argv[j]); + goto error; + } + else if (defn == NULL) + v = NULL; + else + continue; + } + argdef = (arg_t *)v; + + /* Enter argument value */ + if (j + 1 >= argc) { + cmd_ln_print_help_r(cmdln, stderr, defn); + E_ERROR("Argument value for '%s' missing\n", argv[j]); + goto error; + } + + if (argdef == NULL) + val = cmd_ln_val_init(ARG_STRING, argv[j + 1]); + else { + if ((val = cmd_ln_val_init(argdef->type, argv[j + 1])) == NULL) { + cmd_ln_print_help_r(cmdln, stderr, defn); + E_ERROR("Bad argument value for %s: %s\n", argv[j], + argv[j + 1]); + goto error; + } + } + + if ((v = hash_table_enter(cmdln->ht, argv[j], (void *)val)) != + (void *)val) + { + if (strict) { + cmd_ln_val_free(val); + E_ERROR("Duplicate argument name in arguments: %s\n", + argdef->name); + goto error; + } + else { + v = hash_table_replace(cmdln->ht, argv[j], (void *)val); + cmd_ln_val_free((cmd_ln_val_t *)v); + } + } + } + + /* Fill in default values, if any, for unspecified arguments */ + for (i = 0; i < n; i++) { + cmd_ln_val_t *val; + void *v; + + if (hash_table_lookup(cmdln->ht, defn[i].name, &v) < 0) { + if ((val = cmd_ln_val_init(defn[i].type, defn[i].deflt)) == NULL) { + E_ERROR + ("Bad default argument value for %s: %s\n", + defn[i].name, defn[i].deflt); + goto error; + } + hash_table_enter(cmdln->ht, defn[i].name, (void *)val); + } + } + + /* Check for required arguments; exit if any missing */ + j = 0; + for (i = 0; i < n; i++) { + if (defn[i].type & ARG_REQUIRED) { + void *v; + if (hash_table_lookup(cmdln->ht, defn[i].name, &v) != 0) + E_ERROR("Missing required argument %s\n", defn[i].name); + } + } + if (j > 0) { + cmd_ln_print_help_r(cmdln, stderr, defn); + goto error; + } + + if (strict && argc == 1) { + E_ERROR("No arguments given, available options are:\n"); + cmd_ln_print_help_r(cmdln, stderr, defn); + if (defidx) + hash_table_free(defidx); + if (inout_cmdln == NULL) + cmd_ln_free_r(cmdln); + return NULL; + } + +#ifndef _WIN32_WCE + /* Set up logging. We need to do this earlier because we want to dump + * the information to the configured log, not to the stderr. */ + if (cmd_ln_exists_r(cmdln, "-logfn") && cmd_ln_str_r(cmdln, "-logfn")) { + if (err_set_logfile(cmd_ln_str_r(cmdln, "-logfn")) < 0) + E_FATAL_SYSTEM("cannot redirect log output"); + } + + /* Echo command line */ + E_INFO("Parsing command line:\n"); + for (i = 0; i < argc; i++) { + if (argv[i][0] == '-') + E_INFOCONT("\\\n\t"); + E_INFOCONT("%s ", argv[i]); + } + E_INFOCONT("\n\n"); + + /* Print configuration */ + E_INFOCONT("Current configuration:\n"); + arg_dump_r(cmdln, err_get_logfp(), defn, 0); +#endif + + hash_table_free(defidx); + return cmdln; + + error: + if (defidx) + hash_table_free(defidx); + if (inout_cmdln == NULL) + cmd_ln_free_r(cmdln); + E_ERROR("Failed to parse arguments list\n"); + return NULL; +} + +cmd_ln_t * +cmd_ln_init(cmd_ln_t *inout_cmdln, const arg_t *defn, int32 strict, ...) +{ + va_list args; + const char *arg, *val; + char **f_argv; + int32 f_argc; + + va_start(args, strict); + f_argc = 0; + while ((arg = va_arg(args, const char *))) { + ++f_argc; + val = va_arg(args, const char*); + if (val == NULL) { + E_ERROR("Number of arguments must be even!\n"); + return NULL; + } + ++f_argc; + } + va_end(args); + + /* Now allocate f_argv */ + f_argv = (char**)ckd_calloc(f_argc, sizeof(*f_argv)); + va_start(args, strict); + f_argc = 0; + while ((arg = va_arg(args, const char *))) { + f_argv[f_argc] = ckd_salloc(arg); + ++f_argc; + val = va_arg(args, const char*); + f_argv[f_argc] = ckd_salloc(val); + ++f_argc; + } + va_end(args); + + return parse_options(inout_cmdln, defn, f_argc, f_argv, strict); +} + +int +cmd_ln_parse(const arg_t * defn, int32 argc, char *argv[], int strict) +{ + cmd_ln_t *cmdln; + + cmdln = cmd_ln_parse_r(global_cmdln, defn, argc, argv, strict); + if (cmdln == NULL) { + /* Old, bogus behaviour... */ + E_ERROR("Failed to parse arguments list, forced exit\n"); + exit(-1); + } + /* Initialize global_cmdln if not present. */ + if (global_cmdln == NULL) { + global_cmdln = cmdln; + } + return 0; +} + +cmd_ln_t * +cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, const arg_t * defn, const char *filename, int32 strict) +{ + FILE *file; + int argc; + int argv_size; + char *str; + int arg_max_length = 512; + int len = 0; + int quoting, ch; + char **f_argv; + int rv = 0; + const char separator[] = " \t\r\n"; + + if ((file = fopen(filename, "r")) == NULL) { + E_ERROR("Cannot open configuration file %s for reading\n", + filename); + return NULL; + } + + ch = fgetc(file); + /* Skip to the next interesting character */ + for (; ch != EOF && strchr(separator, ch); ch = fgetc(file)) ; + + if (ch == EOF) { + fclose(file); + return NULL; + } + + /* + * Initialize default argv, argc, and argv_size. + */ + argv_size = 10; + argc = 0; + f_argv = (char **)ckd_calloc(argv_size, sizeof(char *)); + /* Silently make room for \0 */ + str = (char* )ckd_calloc(arg_max_length + 1, sizeof(char)); + quoting = 0; + + do { + /* Handle arguments that are commented out */ + if (len == 0 && argc % 2 == 0) { + while (ch == '#') { + /* Skip everything until newline */ + for (ch = fgetc(file); ch != EOF && ch != '\n'; ch = fgetc(file)) ; + /* Skip to the next interesting character */ + for (ch = fgetc(file); ch != EOF && strchr(separator, ch); ch = fgetc(file)) ; + } + + /* Check if we are at the last line (without anything interesting in it) */ + if (ch == EOF) + break; + } + + /* Handle quoted arguments */ + if (ch == '"' || ch == '\'') { + if (quoting == ch) /* End a quoted section with the same type */ + quoting = 0; + else if (quoting) { + E_ERROR("Nesting quotations is not supported!\n"); + rv = 1; + break; + } + else + quoting = ch; /* Start a quoted section */ + } + else if (ch == EOF || (!quoting && strchr(separator, ch))) { + /* Reallocate argv so it is big enough to contain all the arguments */ + if (argc >= argv_size) { + char **tmp_argv; + if (!(tmp_argv = + (char **)ckd_realloc(f_argv, argv_size * 2 * sizeof(char *)))) { + rv = 1; + break; + } + f_argv = tmp_argv; + argv_size *= 2; + } + /* Add the string to the list of arguments */ + f_argv[argc] = ckd_salloc(str); + len = 0; + str[0] = '\0'; + argc++; + + if (quoting) + E_WARN("Unclosed quotation, having EOF close it...\n"); + + /* Skip to the next interesting character */ + for (; ch != EOF && strchr(separator, ch); ch = fgetc(file)) ; + + if (ch == EOF) + break; + + /* We already have the next character */ + continue; + } + else { + if (len >= arg_max_length) { + /* Make room for more chars (including the \0 !) */ + char *tmp_str = str; + if ((tmp_str = (char *)ckd_realloc(str, (1 + arg_max_length * 2) * sizeof(char))) == NULL) { + rv = 1; + break; + } + str = tmp_str; + arg_max_length *= 2; + } + /* Add the char to the argument string */ + str[len++] = ch; + /* Always null terminate */ + str[len] = '\0'; + } + + ch = fgetc(file); + } while (1); + + fclose(file); + + ckd_free(str); + + if (rv) { + for (ch = 0; ch < argc; ++ch) + ckd_free(f_argv[ch]); + ckd_free(f_argv); + return NULL; + } + + return parse_options(inout_cmdln, defn, argc, f_argv, strict); +} + +int +cmd_ln_parse_file(const arg_t * defn, const char *filename, int32 strict) +{ + cmd_ln_t *cmdln; + + cmdln = cmd_ln_parse_file_r(global_cmdln, defn, filename, strict); + if (cmdln == NULL) { + return -1; + } + /* Initialize global_cmdln if not present. */ + if (global_cmdln == NULL) { + global_cmdln = cmdln; + } + return 0; +} + +void +cmd_ln_print_help_r(cmd_ln_t *cmdln, FILE *fp, arg_t const* defn) +{ + if (defn == NULL) + return; + fprintf(fp, "Arguments list definition:\n"); + arg_dump_r(cmdln, fp, defn, 1); +} + +int +cmd_ln_exists_r(cmd_ln_t *cmdln, const char *name) +{ + void *val; + if (cmdln == NULL) + return FALSE; + return (hash_table_lookup(cmdln->ht, name, &val) == 0); +} + +anytype_t * +cmd_ln_access_r(cmd_ln_t *cmdln, const char *name) +{ + void *val; + if (hash_table_lookup(cmdln->ht, name, &val) < 0) { + E_ERROR("Unknown argument: %s\n", name); + return NULL; + } + return (anytype_t *)val; +} + +char const * +cmd_ln_str_r(cmd_ln_t *cmdln, char const *name) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return NULL; + return (char const *)val->ptr; +} + +char const ** +cmd_ln_str_list_r(cmd_ln_t *cmdln, char const *name) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return NULL; + return (char const **)val->ptr; +} + +long +cmd_ln_int_r(cmd_ln_t *cmdln, char const *name) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return 0L; + return val->i; +} + +double +cmd_ln_float_r(cmd_ln_t *cmdln, char const *name) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return 0.0; + return val->fl; +} + +void +cmd_ln_set_str_r(cmd_ln_t *cmdln, char const *name, char const *str) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) { + E_ERROR("Unknown argument: %s\n", name); + return; + } + ckd_free(val->ptr); + if (str == NULL) + val->ptr = NULL; + else + val->ptr = ckd_salloc(str); +} + +void +cmd_ln_set_int_r(cmd_ln_t *cmdln, char const *name, long iv) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) { + E_ERROR("Unknown argument: %s\n", name); + return; + } + val->i = iv; +} + +void +cmd_ln_set_float_r(cmd_ln_t *cmdln, char const *name, double fv) +{ + anytype_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) { + E_ERROR("Unknown argument: %s\n", name); + return; + } + val->fl = fv; +} + +cmd_ln_t * +cmd_ln_retain(cmd_ln_t *cmdln) +{ + ++cmdln->refcount; + return cmdln; +} + +int +cmd_ln_free_r(cmd_ln_t *cmdln) +{ + if (cmdln == NULL) + return 0; + if (--cmdln->refcount > 0) + return cmdln->refcount; + + if (cmdln->ht) { + glist_t entries; + gnode_t *gn; + int32 n; + + entries = hash_table_tolist(cmdln->ht, &n); + for (gn = entries; gn; gn = gnode_next(gn)) { + hash_entry_t *e = (hash_entry_t *)gnode_ptr(gn); + cmd_ln_val_free((cmd_ln_val_t *)e->val); + } + glist_free(entries); + hash_table_free(cmdln->ht); + cmdln->ht = NULL; + } + + if (cmdln->f_argv) { + int32 i; + for (i = 0; i < cmdln->f_argc; ++i) { + ckd_free(cmdln->f_argv[i]); + } + ckd_free(cmdln->f_argv); + cmdln->f_argv = NULL; + cmdln->f_argc = 0; + } + ckd_free(cmdln); + return 0; +} + +void +cmd_ln_free(void) +{ + cmd_ln_free_r(global_cmdln); + global_cmdln = NULL; +} + +/* vim: set ts=4 sw=4: */ diff --git a/media/sphinxbase/src/libsphinxbase/util/dtoa.c b/media/sphinxbase/src/libsphinxbase/util/dtoa.c new file mode 100644 index 000000000..4673ae003 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/dtoa.c @@ -0,0 +1,2979 @@ +/**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + +/**************************************************************** + * This is dtoa.c by David M. Gay, downloaded from + * http://www.netlib.org/fp/dtoa.c on April 15, 2009 and modified for + * inclusion into the Python core by Mark E. T. Dickinson and Eric V. Smith. + * It was taken from Python distribution then and imported into sphinxbase. + * Python version is preferred due to cleanups, though original + * version at netlib is still maintained. + * + * Please remember to check http://www.netlib.org/fp regularly for bugfixes and updates. + * + * The major modifications from Gay's original code are as follows: + * + * 0. The original code has been specialized to Sphinxbase's needs by removing + * many of the #ifdef'd sections. In particular, code to support VAX and + * IBM floating-point formats, hex NaNs, hex floats, locale-aware + * treatment of the decimal point, and setting of the inexact flag have + * been removed. + * + * 1. We use cdk_calloc and ckd_free in place of malloc and free. + * + * 2. The public functions strtod, dtoa and freedtoa all now have + * a sb_ prefix. + * + * 3. Instead of assuming that malloc always succeeds, we thread + * malloc failures through the code. The functions + * + * Balloc, multadd, s2b, i2b, mult, pow5mult, lshift, diff, d2b + * + * of return type *Bigint all return NULL to indicate a malloc failure. + * Similarly, rv_alloc and nrv_alloc (return type char *) return NULL on + * failure. bigcomp now has return type int (it used to be void) and + * returns -1 on failure and 0 otherwise. sb_dtoa returns NULL + * on failure. sb_strtod indicates failure due to malloc failure + * by returning -1.0, setting errno=ENOMEM and *se to s00. + * + * 4. The static variable dtoa_result has been removed. Callers of + * sb_dtoa are expected to call sb_freedtoa to free the memory allocated + * by sb_dtoa. + * + * 5. The code has been reformatted to better fit with C style. + * + * 6. A bug in the memory allocation has been fixed: to avoid FREEing memory + * that hasn't been MALLOC'ed, private_mem should only be used when k <= + * Kmax. + * + * 7. sb_strtod has been modified so that it doesn't accept strings with + * leading whitespace. + * + ***************************************************************/ + +/* Please send bug reports for the original dtoa.c code to David M. Gay (dmg + * at acm dot org, with " at " changed at "@" and " dot " changed to "."). + */ + +/* On a machine with IEEE extended-precision registers, it is + * necessary to specify double-precision (53-bit) rounding precision + * before invoking strtod or dtoa. If the machine uses (the equivalent + * of) Intel 80x87 arithmetic, the call + * _control87(PC_53, MCW_PC); + * does this with many compilers. Whether this or another call is + * appropriate depends on the compiler; for this to work, it may be + * necessary to #include "float.h" or another system-dependent header + * file. + */ + +/* strtod for IEEE-, VAX-, and IBM-arithmetic machines. + * + * This strtod returns a nearest machine number to the input decimal + * string (or sets errno to ERANGE). With IEEE arithmetic, ties are + * broken by the IEEE round-even rule. Otherwise ties are broken by + * biased rounding (add half and chop). + * + * Inspired loosely by William D. Clinger's paper "How to Read Floating + * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101]. + * + * Modifications: + * + * 1. We only require IEEE, IBM, or VAX double-precision + * arithmetic (not IEEE double-extended). + * 2. We get by with floating-point arithmetic in a case that + * Clinger missed -- when we're computing d * 10^n + * for a small integer d and the integer n is not too + * much larger than 22 (the maximum integer k for which + * we can represent 10^k exactly), we may be able to + * compute (d*10^k) * 10^(e-k) with just one roundoff. + * 3. Rather than a bit-at-a-time adjustment of the binary + * result in the hard case, we use floating-point + * arithmetic to determine the adjustment to within + * one bit; only in really hard cases do we need to + * compute a second residual. + * 4. Because of 3., we don't need a large table of powers of 10 + * for ten-to-e (just some small tables, e.g. of 10^k + * for 0 <= k <= 22). + */ + +/* Linking of sphinxbase's #defines to Gay's #defines starts here. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include +#include + +#ifdef WORDS_BIGENDIAN +#define IEEE_MC68k +#else +#define IEEE_8087 +#endif + +#define Long int32 /* ZOMG */ +#define ULong uint32 /* WTF */ +#ifdef HAVE_LONG_LONG +#define ULLong uint64 +#endif + +#define MALLOC ckd_malloc +#define FREE ckd_free + +#define DBL_DIG 15 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define FLT_RADIX 2 + +/* maximum permitted exponent value for strtod; exponents larger than + MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP + should fit into an int. */ +#ifndef MAX_ABS_EXP +#define MAX_ABS_EXP 1100000000U +#endif +/* Bound on length of pieces of input strings in sb_strtod; specifically, + this is used to bound the total number of digits ignoring leading zeros and + the number of digits that follow the decimal point. Ideally, MAX_DIGITS + should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the + exponent clipping in sb_strtod can't affect the value of the output. */ +#ifndef MAX_DIGITS +#define MAX_DIGITS 1000000000U +#endif + +/* End sphinxbase #define linking */ + +#ifdef DEBUG +#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);} +#endif + +#ifndef PRIVATE_MEM +#define PRIVATE_MEM 2304 +#endif +#define PRIVATE_mem ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double)) +static double private_mem[PRIVATE_mem], *pmem_next = private_mem; + +#ifdef __cplusplus +extern "C" { +#endif + +typedef union { double d; ULong L[2]; } U; + +#ifdef IEEE_8087 +#define word0(x) (x)->L[1] +#define word1(x) (x)->L[0] +#else +#define word0(x) (x)->L[0] +#define word1(x) (x)->L[1] +#endif +#define dval(x) (x)->d + +#ifndef STRTOD_DIGLIM +#define STRTOD_DIGLIM 40 +#endif + +/* maximum permitted exponent value for strtod; exponents larger than + MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP + should fit into an int. */ +#ifndef MAX_ABS_EXP +#define MAX_ABS_EXP 1100000000U +#endif +/* Bound on length of pieces of input strings in sb_strtod; specifically, + this is used to bound the total number of digits ignoring leading zeros and + the number of digits that follow the decimal point. Ideally, MAX_DIGITS + should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the + exponent clipping in sb_strtod can't affect the value of the output. */ +#ifndef MAX_DIGITS +#define MAX_DIGITS 1000000000U +#endif + +/* Guard against trying to use the above values on unusual platforms with ints + * of width less than 32 bits. */ +#if MAX_ABS_EXP > 0x7fffffff +#error "MAX_ABS_EXP should fit in an int" +#endif +#if MAX_DIGITS > 0x7fffffff +#error "MAX_DIGITS should fit in an int" +#endif + +/* The following definition of Storeinc is appropriate for MIPS processors. + * An alternative that might be better on some machines is + * #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff) + */ +#if defined(IEEE_8087) +#define Storeinc(a,b,c) (((unsigned short *)a)[1] = (unsigned short)b, \ + ((unsigned short *)a)[0] = (unsigned short)c, a++) +#else +#define Storeinc(a,b,c) (((unsigned short *)a)[0] = (unsigned short)b, \ + ((unsigned short *)a)[1] = (unsigned short)c, a++) +#endif + +/* #define P DBL_MANT_DIG */ +/* Ten_pmax = floor(P*log(2)/log(5)) */ +/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */ +/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */ +/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */ + +#define Exp_shift 20 +#define Exp_shift1 20 +#define Exp_msk1 0x100000 +#define Exp_msk11 0x100000 +#define Exp_mask 0x7ff00000 +#define P 53 +#define Nbits 53 +#define Bias 1023 +#define Emax 1023 +#define Emin (-1022) +#define Etiny (-1074) /* smallest denormal is 2**Etiny */ +#define Exp_1 0x3ff00000 +#define Exp_11 0x3ff00000 +#define Ebits 11 +#define Frac_mask 0xfffff +#define Frac_mask1 0xfffff +#define Ten_pmax 22 +#define Bletch 0x10 +#define Bndry_mask 0xfffff +#define Bndry_mask1 0xfffff +#define Sign_bit 0x80000000 +#define Log2P 1 +#define Tiny0 0 +#define Tiny1 1 +#define Quick_max 14 +#define Int_max 14 + +#ifndef Flt_Rounds +#ifdef FLT_ROUNDS +#define Flt_Rounds FLT_ROUNDS +#else +#define Flt_Rounds 1 +#endif +#endif /*Flt_Rounds*/ + +#define Rounding Flt_Rounds + +#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1)) +#define Big1 0xffffffff + +/* Standard NaN used by sb_stdnan. */ + +#define NAN_WORD0 0x7ff80000 +#define NAN_WORD1 0 + +/* Bits of the representation of positive infinity. */ + +#define POSINF_WORD0 0x7ff00000 +#define POSINF_WORD1 0 + +/* struct BCinfo is used to pass information from sb_strtod to bigcomp */ + +typedef struct BCinfo BCinfo; +struct +BCinfo { + int e0, nd, nd0, scale; +}; + +#define FFFFFFFF 0xffffffffUL + +#define Kmax 7 + +/* struct Bigint is used to represent arbitrary-precision integers. These + integers are stored in sign-magnitude format, with the magnitude stored as + an array of base 2**32 digits. Bigints are always normalized: if x is a + Bigint then x->wds >= 1, and either x->wds == 1 or x[wds-1] is nonzero. + + The Bigint fields are as follows: + + - next is a header used by Balloc and Bfree to keep track of lists + of freed Bigints; it's also used for the linked list of + powers of 5 of the form 5**2**i used by pow5mult. + - k indicates which pool this Bigint was allocated from + - maxwds is the maximum number of words space was allocated for + (usually maxwds == 2**k) + - sign is 1 for negative Bigints, 0 for positive. The sign is unused + (ignored on inputs, set to 0 on outputs) in almost all operations + involving Bigints: a notable exception is the diff function, which + ignores signs on inputs but sets the sign of the output correctly. + - wds is the actual number of significant words + - x contains the vector of words (digits) for this Bigint, from least + significant (x[0]) to most significant (x[wds-1]). +*/ + +struct +Bigint { + struct Bigint *next; + int k, maxwds, sign, wds; + ULong x[1]; +}; + +typedef struct Bigint Bigint; + +#ifndef Py_USING_MEMORY_DEBUGGER + +/* Memory management: memory is allocated from, and returned to, Kmax+1 pools + of memory, where pool k (0 <= k <= Kmax) is for Bigints b with b->maxwds == + 1 << k. These pools are maintained as linked lists, with freelist[k] + pointing to the head of the list for pool k. + + On allocation, if there's no free slot in the appropriate pool, MALLOC is + called to get more memory. This memory is not returned to the system until + Python quits. There's also a private memory pool that's allocated from + in preference to using MALLOC. + + For Bigints with more than (1 << Kmax) digits (which implies at least 1233 + decimal digits), memory is directly allocated using MALLOC, and freed using + FREE. + + XXX: it would be easy to bypass this memory-management system and + translate each call to Balloc into a call to PyMem_Malloc, and each + Bfree to PyMem_Free. Investigate whether this has any significant + performance on impact. */ + +static Bigint *freelist[Kmax+1]; + +/* Allocate space for a Bigint with up to 1<next; + else { + x = 1 << k; + len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1) + /sizeof(double); + if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) { + rv = (Bigint*)pmem_next; + pmem_next += len; + } + else { + rv = (Bigint*)MALLOC(len*sizeof(double)); + if (rv == NULL) + return NULL; + } + rv->k = k; + rv->maxwds = x; + } + rv->sign = rv->wds = 0; + return rv; +} + +/* Free a Bigint allocated with Balloc */ + +static void +Bfree(Bigint *v) +{ + if (v) { + if (v->k > Kmax) + FREE((void*)v); + else { + v->next = freelist[v->k]; + freelist[v->k] = v; + } + } +} + +#else + +/* Alternative versions of Balloc and Bfree that use PyMem_Malloc and + PyMem_Free directly in place of the custom memory allocation scheme above. + These are provided for the benefit of memory debugging tools like + Valgrind. */ + +/* Allocate space for a Bigint with up to 1<k = k; + rv->maxwds = x; + rv->sign = rv->wds = 0; + return rv; +} + +/* Free a Bigint allocated with Balloc */ + +static void +Bfree(Bigint *v) +{ + if (v) { + FREE((void*)v); + } +} + +#endif /* Py_USING_MEMORY_DEBUGGER */ + +#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \ + y->wds*sizeof(Long) + 2*sizeof(int)) + +/* Multiply a Bigint b by m and add a. Either modifies b in place and returns + a pointer to the modified b, or Bfrees b and returns a pointer to a copy. + On failure, return NULL. In this case, b will have been already freed. */ + +static Bigint * +multadd(Bigint *b, int m, int a) /* multiply by m and add a */ +{ + int i, wds; +#ifdef ULLong + ULong *x; + ULLong carry, y; +#else + ULong carry, *x, y; + ULong xi, z; +#endif + Bigint *b1; + + wds = b->wds; + x = b->x; + i = 0; + carry = a; + do { +#ifdef ULLong + y = *x * (ULLong)m + carry; + carry = y >> 32; + *x++ = (ULong)(y & FFFFFFFF); +#else + xi = *x; + y = (xi & 0xffff) * m + carry; + z = (xi >> 16) * m + (y >> 16); + carry = z >> 16; + *x++ = (z << 16) + (y & 0xffff); +#endif + } + while(++i < wds); + if (carry) { + if (wds >= b->maxwds) { + b1 = Balloc(b->k+1); + if (b1 == NULL){ + Bfree(b); + return NULL; + } + Bcopy(b1, b); + Bfree(b); + b = b1; + } + b->x[wds++] = (ULong)carry; + b->wds = wds; + } + return b; +} + +/* convert a string s containing nd decimal digits (possibly containing a + decimal separator at position nd0, which is ignored) to a Bigint. This + function carries on where the parsing code in sb_strtod leaves off: on + entry, y9 contains the result of converting the first 9 digits. Returns + NULL on failure. */ + +static Bigint * +s2b(const char *s, int nd0, int nd, ULong y9) +{ + Bigint *b; + int i, k; + Long x, y; + + x = (nd + 8) / 9; + for(k = 0, y = 1; x > y; y <<= 1, k++) ; + b = Balloc(k); + if (b == NULL) + return NULL; + b->x[0] = y9; + b->wds = 1; + + if (nd <= 9) + return b; + + s += 9; + for (i = 9; i < nd0; i++) { + b = multadd(b, 10, *s++ - '0'); + if (b == NULL) + return NULL; + } + s++; + for(; i < nd; i++) { + b = multadd(b, 10, *s++ - '0'); + if (b == NULL) + return NULL; + } + return b; +} + +/* count leading 0 bits in the 32-bit integer x. */ + +static int +hi0bits(ULong x) +{ + int k = 0; + + if (!(x & 0xffff0000)) { + k = 16; + x <<= 16; + } + if (!(x & 0xff000000)) { + k += 8; + x <<= 8; + } + if (!(x & 0xf0000000)) { + k += 4; + x <<= 4; + } + if (!(x & 0xc0000000)) { + k += 2; + x <<= 2; + } + if (!(x & 0x80000000)) { + k++; + if (!(x & 0x40000000)) + return 32; + } + return k; +} + +/* count trailing 0 bits in the 32-bit integer y, and shift y right by that + number of bits. */ + +static int +lo0bits(ULong *y) +{ + int k; + ULong x = *y; + + if (x & 7) { + if (x & 1) + return 0; + if (x & 2) { + *y = x >> 1; + return 1; + } + *y = x >> 2; + return 2; + } + k = 0; + if (!(x & 0xffff)) { + k = 16; + x >>= 16; + } + if (!(x & 0xff)) { + k += 8; + x >>= 8; + } + if (!(x & 0xf)) { + k += 4; + x >>= 4; + } + if (!(x & 0x3)) { + k += 2; + x >>= 2; + } + if (!(x & 1)) { + k++; + x >>= 1; + if (!x) + return 32; + } + *y = x; + return k; +} + +/* convert a small nonnegative integer to a Bigint */ + +static Bigint * +i2b(int i) +{ + Bigint *b; + + b = Balloc(1); + if (b == NULL) + return NULL; + b->x[0] = i; + b->wds = 1; + return b; +} + +/* multiply two Bigints. Returns a new Bigint, or NULL on failure. Ignores + the signs of a and b. */ + +static Bigint * +mult(Bigint *a, Bigint *b) +{ + Bigint *c; + int k, wa, wb, wc; + ULong *x, *xa, *xae, *xb, *xbe, *xc, *xc0; + ULong y; +#ifdef ULLong + ULLong carry, z; +#else + ULong carry, z; + ULong z2; +#endif + + if ((!a->x[0] && a->wds == 1) || (!b->x[0] && b->wds == 1)) { + c = Balloc(0); + if (c == NULL) + return NULL; + c->wds = 1; + c->x[0] = 0; + return c; + } + + if (a->wds < b->wds) { + c = a; + a = b; + b = c; + } + k = a->k; + wa = a->wds; + wb = b->wds; + wc = wa + wb; + if (wc > a->maxwds) + k++; + c = Balloc(k); + if (c == NULL) + return NULL; + for(x = c->x, xa = x + wc; x < xa; x++) + *x = 0; + xa = a->x; + xae = xa + wa; + xb = b->x; + xbe = xb + wb; + xc0 = c->x; +#ifdef ULLong + for(; xb < xbe; xc0++) { + if ((y = *xb++)) { + x = xa; + xc = xc0; + carry = 0; + do { + z = *x++ * (ULLong)y + *xc + carry; + carry = z >> 32; + *xc++ = (ULong)(z & FFFFFFFF); + } + while(x < xae); + *xc = (ULong)carry; + } + } +#else + for(; xb < xbe; xb++, xc0++) { + if (y = *xb & 0xffff) { + x = xa; + xc = xc0; + carry = 0; + do { + z = (*x & 0xffff) * y + (*xc & 0xffff) + carry; + carry = z >> 16; + z2 = (*x++ >> 16) * y + (*xc >> 16) + carry; + carry = z2 >> 16; + Storeinc(xc, z2, z); + } + while(x < xae); + *xc = carry; + } + if (y = *xb >> 16) { + x = xa; + xc = xc0; + carry = 0; + z2 = *xc; + do { + z = (*x & 0xffff) * y + (*xc >> 16) + carry; + carry = z >> 16; + Storeinc(xc, z, z2); + z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry; + carry = z2 >> 16; + } + while(x < xae); + *xc = z2; + } + } +#endif + for(xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ; + c->wds = wc; + return c; +} + +#ifndef Py_USING_MEMORY_DEBUGGER + +/* p5s is a linked list of powers of 5 of the form 5**(2**i), i >= 2 */ + +static Bigint *p5s; + +/* multiply the Bigint b by 5**k. Returns a pointer to the result, or NULL on + failure; if the returned pointer is distinct from b then the original + Bigint b will have been Bfree'd. Ignores the sign of b. */ + +static Bigint * +pow5mult(Bigint *b, int k) +{ + Bigint *b1, *p5, *p51; + int i; + static int p05[3] = { 5, 25, 125 }; + + if ((i = k & 3)) { + b = multadd(b, p05[i-1], 0); + if (b == NULL) + return NULL; + } + + if (!(k >>= 2)) + return b; + p5 = p5s; + if (!p5) { + /* first time */ + p5 = i2b(625); + if (p5 == NULL) { + Bfree(b); + return NULL; + } + p5s = p5; + p5->next = 0; + } + for(;;) { + if (k & 1) { + b1 = mult(b, p5); + Bfree(b); + b = b1; + if (b == NULL) + return NULL; + } + if (!(k >>= 1)) + break; + p51 = p5->next; + if (!p51) { + p51 = mult(p5,p5); + if (p51 == NULL) { + Bfree(b); + return NULL; + } + p51->next = 0; + p5->next = p51; + } + p5 = p51; + } + return b; +} + +#else + +/* Version of pow5mult that doesn't cache powers of 5. Provided for + the benefit of memory debugging tools like Valgrind. */ + +static Bigint * +pow5mult(Bigint *b, int k) +{ + Bigint *b1, *p5, *p51; + int i; + static int p05[3] = { 5, 25, 125 }; + + if ((i = k & 3)) { + b = multadd(b, p05[i-1], 0); + if (b == NULL) + return NULL; + } + + if (!(k >>= 2)) + return b; + p5 = i2b(625); + if (p5 == NULL) { + Bfree(b); + return NULL; + } + + for(;;) { + if (k & 1) { + b1 = mult(b, p5); + Bfree(b); + b = b1; + if (b == NULL) { + Bfree(p5); + return NULL; + } + } + if (!(k >>= 1)) + break; + p51 = mult(p5, p5); + Bfree(p5); + p5 = p51; + if (p5 == NULL) { + Bfree(b); + return NULL; + } + } + Bfree(p5); + return b; +} + +#endif /* Py_USING_MEMORY_DEBUGGER */ + +/* shift a Bigint b left by k bits. Return a pointer to the shifted result, + or NULL on failure. If the returned pointer is distinct from b then the + original b will have been Bfree'd. Ignores the sign of b. */ + +static Bigint * +lshift(Bigint *b, int k) +{ + int i, k1, n, n1; + Bigint *b1; + ULong *x, *x1, *xe, z; + + if (!k || (!b->x[0] && b->wds == 1)) + return b; + + n = k >> 5; + k1 = b->k; + n1 = n + b->wds + 1; + for(i = b->maxwds; n1 > i; i <<= 1) + k1++; + b1 = Balloc(k1); + if (b1 == NULL) { + Bfree(b); + return NULL; + } + x1 = b1->x; + for(i = 0; i < n; i++) + *x1++ = 0; + x = b->x; + xe = x + b->wds; + if (k &= 0x1f) { + k1 = 32 - k; + z = 0; + do { + *x1++ = *x << k | z; + z = *x++ >> k1; + } + while(x < xe); + if ((*x1 = z)) + ++n1; + } + else do + *x1++ = *x++; + while(x < xe); + b1->wds = n1 - 1; + Bfree(b); + return b1; +} + +/* Do a three-way compare of a and b, returning -1 if a < b, 0 if a == b and + 1 if a > b. Ignores signs of a and b. */ + +static int +cmp(Bigint *a, Bigint *b) +{ + ULong *xa, *xa0, *xb, *xb0; + int i, j; + + i = a->wds; + j = b->wds; +#ifdef DEBUG + if (i > 1 && !a->x[i-1]) + Bug("cmp called with a->x[a->wds-1] == 0"); + if (j > 1 && !b->x[j-1]) + Bug("cmp called with b->x[b->wds-1] == 0"); +#endif + if (i -= j) + return i; + xa0 = a->x; + xa = xa0 + j; + xb0 = b->x; + xb = xb0 + j; + for(;;) { + if (*--xa != *--xb) + return *xa < *xb ? -1 : 1; + if (xa <= xa0) + break; + } + return 0; +} + +/* Take the difference of Bigints a and b, returning a new Bigint. Returns + NULL on failure. The signs of a and b are ignored, but the sign of the + result is set appropriately. */ + +static Bigint * +diff(Bigint *a, Bigint *b) +{ + Bigint *c; + int i, wa, wb; + ULong *xa, *xae, *xb, *xbe, *xc; +#ifdef ULLong + ULLong borrow, y; +#else + ULong borrow, y; + ULong z; +#endif + + i = cmp(a,b); + if (!i) { + c = Balloc(0); + if (c == NULL) + return NULL; + c->wds = 1; + c->x[0] = 0; + return c; + } + if (i < 0) { + c = a; + a = b; + b = c; + i = 1; + } + else + i = 0; + c = Balloc(a->k); + if (c == NULL) + return NULL; + c->sign = i; + wa = a->wds; + xa = a->x; + xae = xa + wa; + wb = b->wds; + xb = b->x; + xbe = xb + wb; + xc = c->x; + borrow = 0; +#ifdef ULLong + do { + y = (ULLong)*xa++ - *xb++ - borrow; + borrow = y >> 32 & (ULong)1; + *xc++ = (ULong)(y & FFFFFFFF); + } + while(xb < xbe); + while(xa < xae) { + y = *xa++ - borrow; + borrow = y >> 32 & (ULong)1; + *xc++ = (ULong)(y & FFFFFFFF); + } +#else + do { + y = (*xa & 0xffff) - (*xb & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*xa++ >> 16) - (*xb++ >> 16) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(xc, z, y); + } + while(xb < xbe); + while(xa < xae) { + y = (*xa & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*xa++ >> 16) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(xc, z, y); + } +#endif + while(!*--xc) + wa--; + c->wds = wa; + return c; +} + +/* Given a positive normal double x, return the difference between x and the + next double up. Doesn't give correct results for subnormals. */ + +static double +ulp(U *x) +{ + Long L; + U u; + + L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1; + word0(&u) = L; + word1(&u) = 0; + return dval(&u); +} + +/* Convert a Bigint to a double plus an exponent */ + +static double +b2d(Bigint *a, int *e) +{ + ULong *xa, *xa0, w, y, z; + int k; + U d; + + xa0 = a->x; + xa = xa0 + a->wds; + y = *--xa; +#ifdef DEBUG + if (!y) Bug("zero y in b2d"); +#endif + k = hi0bits(y); + *e = 32 - k; + if (k < Ebits) { + word0(&d) = Exp_1 | y >> (Ebits - k); + w = xa > xa0 ? *--xa : 0; + word1(&d) = y << ((32-Ebits) + k) | w >> (Ebits - k); + goto ret_d; + } + z = xa > xa0 ? *--xa : 0; + if (k -= Ebits) { + word0(&d) = Exp_1 | y << k | z >> (32 - k); + y = xa > xa0 ? *--xa : 0; + word1(&d) = z << k | y >> (32 - k); + } + else { + word0(&d) = Exp_1 | y; + word1(&d) = z; + } + ret_d: + return dval(&d); +} + +/* Convert a scaled double to a Bigint plus an exponent. Similar to d2b, + except that it accepts the scale parameter used in sb_strtod (which + should be either 0 or 2*P), and the normalization for the return value is + different (see below). On input, d should be finite and nonnegative, and d + / 2**scale should be exactly representable as an IEEE 754 double. + + Returns a Bigint b and an integer e such that + + dval(d) / 2**scale = b * 2**e. + + Unlike d2b, b is not necessarily odd: b and e are normalized so + that either 2**(P-1) <= b < 2**P and e >= Etiny, or b < 2**P + and e == Etiny. This applies equally to an input of 0.0: in that + case the return values are b = 0 and e = Etiny. + + The above normalization ensures that for all possible inputs d, + 2**e gives ulp(d/2**scale). + + Returns NULL on failure. +*/ + +static Bigint * +sd2b(U *d, int scale, int *e) +{ + Bigint *b; + + b = Balloc(1); + if (b == NULL) + return NULL; + + /* First construct b and e assuming that scale == 0. */ + b->wds = 2; + b->x[0] = word1(d); + b->x[1] = word0(d) & Frac_mask; + *e = Etiny - 1 + (int)((word0(d) & Exp_mask) >> Exp_shift); + if (*e < Etiny) + *e = Etiny; + else + b->x[1] |= Exp_msk1; + + /* Now adjust for scale, provided that b != 0. */ + if (scale && (b->x[0] || b->x[1])) { + *e -= scale; + if (*e < Etiny) { + scale = Etiny - *e; + *e = Etiny; + /* We can't shift more than P-1 bits without shifting out a 1. */ + assert(0 < scale && scale <= P - 1); + if (scale >= 32) { + /* The bits shifted out should all be zero. */ + assert(b->x[0] == 0); + b->x[0] = b->x[1]; + b->x[1] = 0; + scale -= 32; + } + if (scale) { + /* The bits shifted out should all be zero. */ + assert(b->x[0] << (32 - scale) == 0); + b->x[0] = (b->x[0] >> scale) | (b->x[1] << (32 - scale)); + b->x[1] >>= scale; + } + } + } + /* Ensure b is normalized. */ + if (!b->x[1]) + b->wds = 1; + + return b; +} + +/* Convert a double to a Bigint plus an exponent. Return NULL on failure. + + Given a finite nonzero double d, return an odd Bigint b and exponent *e + such that fabs(d) = b * 2**e. On return, *bbits gives the number of + significant bits of b; that is, 2**(*bbits-1) <= b < 2**(*bbits). + + If d is zero, then b == 0, *e == -1010, *bbits = 0. + */ + +static Bigint * +d2b(U *d, int *e, int *bits) +{ + Bigint *b; + int de, k; + ULong *x, y, z; + int i; + + b = Balloc(1); + if (b == NULL) + return NULL; + x = b->x; + + z = word0(d) & Frac_mask; + word0(d) &= 0x7fffffff; /* clear sign bit, which we ignore */ + if ((de = (int)(word0(d) >> Exp_shift))) + z |= Exp_msk1; + if ((y = word1(d))) { + if ((k = lo0bits(&y))) { + x[0] = y | z << (32 - k); + z >>= k; + } + else + x[0] = y; + i = + b->wds = (x[1] = z) ? 2 : 1; + } + else { + k = lo0bits(&z); + x[0] = z; + i = + b->wds = 1; + k += 32; + } + if (de) { + *e = de - Bias - (P-1) + k; + *bits = P - k; + } + else { + *e = de - Bias - (P-1) + 1 + k; + *bits = 32*i - hi0bits(x[i-1]); + } + return b; +} + +/* Compute the ratio of two Bigints, as a double. The result may have an + error of up to 2.5 ulps. */ + +static double +ratio(Bigint *a, Bigint *b) +{ + U da, db; + int k, ka, kb; + + dval(&da) = b2d(a, &ka); + dval(&db) = b2d(b, &kb); + k = ka - kb + 32*(a->wds - b->wds); + if (k > 0) + word0(&da) += k*Exp_msk1; + else { + k = -k; + word0(&db) += k*Exp_msk1; + } + return dval(&da) / dval(&db); +} + +static const double +tens[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22 +}; + +static const double +bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 }; +static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, + 9007199254740992.*9007199254740992.e-256 + /* = 2^106 * 1e-256 */ +}; +/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */ +/* flag unnecessarily. It leads to a song and dance at the end of strtod. */ +#define Scale_Bit 0x10 +#define n_bigtens 5 + +#define ULbits 32 +#define kshift 5 +#define kmask 31 + + +static int +dshift(Bigint *b, int p2) +{ + int rv = hi0bits(b->x[b->wds-1]) - 4; + if (p2 > 0) + rv -= p2; + return rv & kmask; +} + +/* special case of Bigint division. The quotient is always in the range 0 <= + quotient < 10, and on entry the divisor S is normalized so that its top 4 + bits (28--31) are zero and bit 27 is set. */ + +static int +quorem(Bigint *b, Bigint *S) +{ + int n; + ULong *bx, *bxe, q, *sx, *sxe; +#ifdef ULLong + ULLong borrow, carry, y, ys; +#else + ULong borrow, carry, y, ys; + ULong si, z, zs; +#endif + + n = S->wds; +#ifdef DEBUG + /*debug*/ if (b->wds > n) + /*debug*/ Bug("oversize b in quorem"); +#endif + if (b->wds < n) + return 0; + sx = S->x; + sxe = sx + --n; + bx = b->x; + bxe = bx + n; + q = *bxe / (*sxe + 1); /* ensure q <= true quotient */ +#ifdef DEBUG + /*debug*/ if (q > 9) + /*debug*/ Bug("oversized quotient in quorem"); +#endif + if (q) { + borrow = 0; + carry = 0; + do { +#ifdef ULLong + ys = *sx++ * (ULLong)q + carry; + carry = ys >> 32; + y = *bx - (ys & FFFFFFFF) - borrow; + borrow = y >> 32 & (ULong)1; + *bx++ = (ULong)(y & FFFFFFFF); +#else + si = *sx++; + ys = (si & 0xffff) * q + carry; + zs = (si >> 16) * q + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*bx >> 16) - (zs & 0xffff) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(bx, z, y); +#endif + } + while(sx <= sxe); + if (!*bxe) { + bx = b->x; + while(--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + if (cmp(b, S) >= 0) { + q++; + borrow = 0; + carry = 0; + bx = b->x; + sx = S->x; + do { +#ifdef ULLong + ys = *sx++ + carry; + carry = ys >> 32; + y = *bx - (ys & FFFFFFFF) - borrow; + borrow = y >> 32 & (ULong)1; + *bx++ = (ULong)(y & FFFFFFFF); +#else + si = *sx++; + ys = (si & 0xffff) + carry; + zs = (si >> 16) + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*bx >> 16) - (zs & 0xffff) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(bx, z, y); +#endif + } + while(sx <= sxe); + bx = b->x; + bxe = bx + n; + if (!*bxe) { + while(--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + return q; +} + +/* sulp(x) is a version of ulp(x) that takes bc.scale into account. + + Assuming that x is finite and nonnegative (positive zero is fine + here) and x / 2^bc.scale is exactly representable as a double, + sulp(x) is equivalent to 2^bc.scale * ulp(x / 2^bc.scale). */ + +static double +sulp(U *x, BCinfo *bc) +{ + U u; + + if (bc->scale && 2*P + 1 > (int)((word0(x) & Exp_mask) >> Exp_shift)) { + /* rv/2^bc->scale is subnormal */ + word0(&u) = (P+2)*Exp_msk1; + word1(&u) = 0; + return u.d; + } + else { + assert(word0(x) || word1(x)); /* x != 0.0 */ + return ulp(x); + } +} + +/* The bigcomp function handles some hard cases for strtod, for inputs + with more than STRTOD_DIGLIM digits. It's called once an initial + estimate for the double corresponding to the input string has + already been obtained by the code in sb_strtod. + + The bigcomp function is only called after sb_strtod has found a + double value rv such that either rv or rv + 1ulp represents the + correctly rounded value corresponding to the original string. It + determines which of these two values is the correct one by + computing the decimal digits of rv + 0.5ulp and comparing them with + the corresponding digits of s0. + + In the following, write dv for the absolute value of the number represented + by the input string. + + Inputs: + + s0 points to the first significant digit of the input string. + + rv is a (possibly scaled) estimate for the closest double value to the + value represented by the original input to sb_strtod. If + bc->scale is nonzero, then rv/2^(bc->scale) is the approximation to + the input value. + + bc is a struct containing information gathered during the parsing and + estimation steps of sb_strtod. Description of fields follows: + + bc->e0 gives the exponent of the input value, such that dv = (integer + given by the bd->nd digits of s0) * 10**e0 + + bc->nd gives the total number of significant digits of s0. It will + be at least 1. + + bc->nd0 gives the number of significant digits of s0 before the + decimal separator. If there's no decimal separator, bc->nd0 == + bc->nd. + + bc->scale is the value used to scale rv to avoid doing arithmetic with + subnormal values. It's either 0 or 2*P (=106). + + Outputs: + + On successful exit, rv/2^(bc->scale) is the closest double to dv. + + Returns 0 on success, -1 on failure (e.g., due to a failed malloc call). */ + +static int +bigcomp(U *rv, const char *s0, BCinfo *bc) +{ + Bigint *b, *d; + int b2, d2, dd, i, nd, nd0, odd, p2, p5; + + nd = bc->nd; + nd0 = bc->nd0; + p5 = nd + bc->e0; + b = sd2b(rv, bc->scale, &p2); + if (b == NULL) + return -1; + + /* record whether the lsb of rv/2^(bc->scale) is odd: in the exact halfway + case, this is used for round to even. */ + odd = b->x[0] & 1; + + /* left shift b by 1 bit and or a 1 into the least significant bit; + this gives us b * 2**p2 = rv/2^(bc->scale) + 0.5 ulp. */ + b = lshift(b, 1); + if (b == NULL) + return -1; + b->x[0] |= 1; + p2--; + + p2 -= p5; + d = i2b(1); + if (d == NULL) { + Bfree(b); + return -1; + } + /* Arrange for convenient computation of quotients: + * shift left if necessary so divisor has 4 leading 0 bits. + */ + if (p5 > 0) { + d = pow5mult(d, p5); + if (d == NULL) { + Bfree(b); + return -1; + } + } + else if (p5 < 0) { + b = pow5mult(b, -p5); + if (b == NULL) { + Bfree(d); + return -1; + } + } + if (p2 > 0) { + b2 = p2; + d2 = 0; + } + else { + b2 = 0; + d2 = -p2; + } + i = dshift(d, d2); + if ((b2 += i) > 0) { + b = lshift(b, b2); + if (b == NULL) { + Bfree(d); + return -1; + } + } + if ((d2 += i) > 0) { + d = lshift(d, d2); + if (d == NULL) { + Bfree(b); + return -1; + } + } + + /* Compare s0 with b/d: set dd to -1, 0, or 1 according as s0 < b/d, s0 == + * b/d, or s0 > b/d. Here the digits of s0 are thought of as representing + * a number in the range [0.1, 1). */ + if (cmp(b, d) >= 0) + /* b/d >= 1 */ + dd = -1; + else { + i = 0; + for(;;) { + b = multadd(b, 10, 0); + if (b == NULL) { + Bfree(d); + return -1; + } + dd = s0[i < nd0 ? i : i+1] - '0' - quorem(b, d); + i++; + + if (dd) + break; + if (!b->x[0] && b->wds == 1) { + /* b/d == 0 */ + dd = i < nd; + break; + } + if (!(i < nd)) { + /* b/d != 0, but digits of s0 exhausted */ + dd = -1; + break; + } + } + } + Bfree(b); + Bfree(d); + if (dd > 0 || (dd == 0 && odd)) + dval(rv) += sulp(rv, bc); + return 0; +} + +/* Return a 'standard' NaN value. + + There are exactly two quiet NaNs that don't arise by 'quieting' signaling + NaNs (see IEEE 754-2008, section 6.2.1). If sign == 0, return the one whose + sign bit is cleared. Otherwise, return the one whose sign bit is set. +*/ + +double +sb_stdnan(int sign) +{ + U rv; + word0(&rv) = NAN_WORD0; + word1(&rv) = NAN_WORD1; + if (sign) + word0(&rv) |= Sign_bit; + return dval(&rv); +} + +/* Return positive or negative infinity, according to the given sign (0 for + * positive infinity, 1 for negative infinity). */ + +double +sb_infinity(int sign) +{ + U rv; + word0(&rv) = POSINF_WORD0; + word1(&rv) = POSINF_WORD1; + return sign ? -dval(&rv) : dval(&rv); +} + +double +sb_strtod(const char *s00, char **se) +{ + int bb2, bb5, bbe, bd2, bd5, bs2, c, dsign, e, e1, error; + int esign, i, j, k, lz, nd, nd0, odd, sign; + const char *s, *s0, *s1; + double aadj, aadj1; + U aadj2, adj, rv, rv0; + ULong y, z, abs_exp; + Long L; + BCinfo bc; + Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; + size_t ndigits, fraclen; + + dval(&rv) = 0.; + + /* Start parsing. */ + c = *(s = s00); + + /* Parse optional sign, if present. */ + sign = 0; + switch (c) { + case '-': + sign = 1; + /* no break */ + case '+': + c = *++s; + } + + /* Skip leading zeros: lz is true iff there were leading zeros. */ + s1 = s; + while (c == '0') + c = *++s; + lz = s != s1; + + /* Point s0 at the first nonzero digit (if any). fraclen will be the + number of digits between the decimal point and the end of the + digit string. ndigits will be the total number of digits ignoring + leading zeros. */ + s0 = s1 = s; + while ('0' <= c && c <= '9') + c = *++s; + ndigits = s - s1; + fraclen = 0; + + /* Parse decimal point and following digits. */ + if (c == '.') { + c = *++s; + if (!ndigits) { + s1 = s; + while (c == '0') + c = *++s; + lz = lz || s != s1; + fraclen += (s - s1); + s0 = s; + } + s1 = s; + while ('0' <= c && c <= '9') + c = *++s; + ndigits += s - s1; + fraclen += s - s1; + } + + /* Now lz is true if and only if there were leading zero digits, and + ndigits gives the total number of digits ignoring leading zeros. A + valid input must have at least one digit. */ + if (!ndigits && !lz) { + if (se) + *se = (char *)s00; + goto parse_error; + } + + /* Range check ndigits and fraclen to make sure that they, and values + computed with them, can safely fit in an int. */ + if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) { + if (se) + *se = (char *)s00; + goto parse_error; + } + nd = (int)ndigits; + nd0 = (int)ndigits - (int)fraclen; + + /* Parse exponent. */ + e = 0; + if (c == 'e' || c == 'E') { + s00 = s; + c = *++s; + + /* Exponent sign. */ + esign = 0; + switch (c) { + case '-': + esign = 1; + /* no break */ + case '+': + c = *++s; + } + + /* Skip zeros. lz is true iff there are leading zeros. */ + s1 = s; + while (c == '0') + c = *++s; + lz = s != s1; + + /* Get absolute value of the exponent. */ + s1 = s; + abs_exp = 0; + while ('0' <= c && c <= '9') { + abs_exp = 10*abs_exp + (c - '0'); + c = *++s; + } + + /* abs_exp will be correct modulo 2**32. But 10**9 < 2**32, so if + there are at most 9 significant exponent digits then overflow is + impossible. */ + if (s - s1 > 9 || abs_exp > MAX_ABS_EXP) + e = (int)MAX_ABS_EXP; + else + e = (int)abs_exp; + if (esign) + e = -e; + + /* A valid exponent must have at least one digit. */ + if (s == s1 && !lz) + s = s00; + } + + /* Adjust exponent to take into account position of the point. */ + e -= nd - nd0; + if (nd0 <= 0) + nd0 = nd; + + /* Finished parsing. Set se to indicate how far we parsed */ + if (se) + *se = (char *)s; + + /* If all digits were zero, exit with return value +-0.0. Otherwise, + strip trailing zeros: scan back until we hit a nonzero digit. */ + if (!nd) + goto ret; + for (i = nd; i > 0; ) { + --i; + if (s0[i < nd0 ? i : i+1] != '0') { + ++i; + break; + } + } + e += nd - i; + nd = i; + if (nd0 > nd) + nd0 = nd; + + /* Summary of parsing results. After parsing, and dealing with zero + * inputs, we have values s0, nd0, nd, e, sign, where: + * + * - s0 points to the first significant digit of the input string + * + * - nd is the total number of significant digits (here, and + * below, 'significant digits' means the set of digits of the + * significand of the input that remain after ignoring leading + * and trailing zeros). + * + * - nd0 indicates the position of the decimal point, if present; it + * satisfies 1 <= nd0 <= nd. The nd significant digits are in + * s0[0:nd0] and s0[nd0+1:nd+1] using the usual Python half-open slice + * notation. (If nd0 < nd, then s0[nd0] contains a '.' character; if + * nd0 == nd, then s0[nd0] could be any non-digit character.) + * + * - e is the adjusted exponent: the absolute value of the number + * represented by the original input string is n * 10**e, where + * n is the integer represented by the concatenation of + * s0[0:nd0] and s0[nd0+1:nd+1] + * + * - sign gives the sign of the input: 1 for negative, 0 for positive + * + * - the first and last significant digits are nonzero + */ + + /* put first DBL_DIG+1 digits into integer y and z. + * + * - y contains the value represented by the first min(9, nd) + * significant digits + * + * - if nd > 9, z contains the value represented by significant digits + * with indices in [9, min(16, nd)). So y * 10**(min(16, nd) - 9) + z + * gives the value represented by the first min(16, nd) sig. digits. + */ + + bc.e0 = e1 = e; + y = z = 0; + for (i = 0; i < nd; i++) { + if (i < 9) + y = 10*y + s0[i < nd0 ? i : i+1] - '0'; + else if (i < DBL_DIG+1) + z = 10*z + s0[i < nd0 ? i : i+1] - '0'; + else + break; + } + + k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1; + dval(&rv) = y; + if (k > 9) { + dval(&rv) = tens[k - 9] * dval(&rv) + z; + } + bd0 = 0; + if (nd <= DBL_DIG + && Flt_Rounds == 1 + ) { + if (!e) + goto ret; + if (e > 0) { + if (e <= Ten_pmax) { + dval(&rv) *= tens[e]; + goto ret; + } + i = DBL_DIG - nd; + if (e <= Ten_pmax + i) { + /* A fancier test would sometimes let us do + * this for larger i values. + */ + e -= i; + dval(&rv) *= tens[i]; + dval(&rv) *= tens[e]; + goto ret; + } + } + else if (e >= -Ten_pmax) { + dval(&rv) /= tens[-e]; + goto ret; + } + } + e1 += nd - k; + + bc.scale = 0; + + /* Get starting approximation = rv * 10**e1 */ + + if (e1 > 0) { + if ((i = e1 & 15)) + dval(&rv) *= tens[i]; + if (e1 &= ~15) { + if (e1 > DBL_MAX_10_EXP) + goto ovfl; + e1 >>= 4; + for(j = 0; e1 > 1; j++, e1 >>= 1) + if (e1 & 1) + dval(&rv) *= bigtens[j]; + /* The last multiplication could overflow. */ + word0(&rv) -= P*Exp_msk1; + dval(&rv) *= bigtens[j]; + if ((z = word0(&rv) & Exp_mask) + > Exp_msk1*(DBL_MAX_EXP+Bias-P)) + goto ovfl; + if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) { + /* set to largest number */ + /* (Can't trust DBL_MAX) */ + word0(&rv) = Big0; + word1(&rv) = Big1; + } + else + word0(&rv) += P*Exp_msk1; + } + } + else if (e1 < 0) { + /* The input decimal value lies in [10**e1, 10**(e1+16)). + + If e1 <= -512, underflow immediately. + If e1 <= -256, set bc.scale to 2*P. + + So for input value < 1e-256, bc.scale is always set; + for input value >= 1e-240, bc.scale is never set. + For input values in [1e-256, 1e-240), bc.scale may or may + not be set. */ + + e1 = -e1; + if ((i = e1 & 15)) + dval(&rv) /= tens[i]; + if (e1 >>= 4) { + if (e1 >= 1 << n_bigtens) + goto undfl; + if (e1 & Scale_Bit) + bc.scale = 2*P; + for(j = 0; e1 > 0; j++, e1 >>= 1) + if (e1 & 1) + dval(&rv) *= tinytens[j]; + if (bc.scale && (j = 2*P + 1 - ((word0(&rv) & Exp_mask) + >> Exp_shift)) > 0) { + /* scaled rv is denormal; clear j low bits */ + if (j >= 32) { + word1(&rv) = 0; + if (j >= 53) + word0(&rv) = (P+2)*Exp_msk1; + else + word0(&rv) &= 0xffffffff << (j-32); + } + else + word1(&rv) &= 0xffffffff << j; + } + if (!dval(&rv)) + goto undfl; + } + } + + /* Now the hard part -- adjusting rv to the correct value.*/ + + /* Put digits into bd: true value = bd * 10^e */ + + bc.nd = nd; + bc.nd0 = nd0; /* Only needed if nd > STRTOD_DIGLIM, but done here */ + /* to silence an erroneous warning about bc.nd0 */ + /* possibly not being initialized. */ + if (nd > STRTOD_DIGLIM) { + /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */ + /* minimum number of decimal digits to distinguish double values */ + /* in IEEE arithmetic. */ + + /* Truncate input to 18 significant digits, then discard any trailing + zeros on the result by updating nd, nd0, e and y suitably. (There's + no need to update z; it's not reused beyond this point.) */ + for (i = 18; i > 0; ) { + /* scan back until we hit a nonzero digit. significant digit 'i' + is s0[i] if i < nd0, s0[i+1] if i >= nd0. */ + --i; + if (s0[i < nd0 ? i : i+1] != '0') { + ++i; + break; + } + } + e += nd - i; + nd = i; + if (nd0 > nd) + nd0 = nd; + if (nd < 9) { /* must recompute y */ + y = 0; + for(i = 0; i < nd0; ++i) + y = 10*y + s0[i] - '0'; + for(; i < nd; ++i) + y = 10*y + s0[i+1] - '0'; + } + } + bd0 = s2b(s0, nd0, nd, y); + if (bd0 == NULL) + goto failed_malloc; + + /* Notation for the comments below. Write: + + - dv for the absolute value of the number represented by the original + decimal input string. + + - if we've truncated dv, write tdv for the truncated value. + Otherwise, set tdv == dv. + + - srv for the quantity rv/2^bc.scale; so srv is the current binary + approximation to tdv (and dv). It should be exactly representable + in an IEEE 754 double. + */ + + for(;;) { + + /* This is the main correction loop for sb_strtod. + + We've got a decimal value tdv, and a floating-point approximation + srv=rv/2^bc.scale to tdv. The aim is to determine whether srv is + close enough (i.e., within 0.5 ulps) to tdv, and to compute a new + approximation if not. + + To determine whether srv is close enough to tdv, compute integers + bd, bb and bs proportional to tdv, srv and 0.5 ulp(srv) + respectively, and then use integer arithmetic to determine whether + |tdv - srv| is less than, equal to, or greater than 0.5 ulp(srv). + */ + + bd = Balloc(bd0->k); + if (bd == NULL) { + Bfree(bd0); + goto failed_malloc; + } + Bcopy(bd, bd0); + bb = sd2b(&rv, bc.scale, &bbe); /* srv = bb * 2^bbe */ + if (bb == NULL) { + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + /* Record whether lsb of bb is odd, in case we need this + for the round-to-even step later. */ + odd = bb->x[0] & 1; + + /* tdv = bd * 10**e; srv = bb * 2**bbe */ + bs = i2b(1); + if (bs == NULL) { + Bfree(bb); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + + if (e >= 0) { + bb2 = bb5 = 0; + bd2 = bd5 = e; + } + else { + bb2 = bb5 = -e; + bd2 = bd5 = 0; + } + if (bbe >= 0) + bb2 += bbe; + else + bd2 -= bbe; + bs2 = bb2; + bb2++; + bd2++; + + /* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1, + and bs == 1, so: + + tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5) + srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2) + 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2) + + It follows that: + + M * tdv = bd * 2**bd2 * 5**bd5 + M * srv = bb * 2**bb2 * 5**bb5 + M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5 + + for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but + this fact is not needed below.) + */ + + /* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */ + i = bb2 < bd2 ? bb2 : bd2; + if (i > bs2) + i = bs2; + if (i > 0) { + bb2 -= i; + bd2 -= i; + bs2 -= i; + } + + /* Scale bb, bd, bs by the appropriate powers of 2 and 5. */ + if (bb5 > 0) { + bs = pow5mult(bs, bb5); + if (bs == NULL) { + Bfree(bb); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + bb1 = mult(bs, bb); + Bfree(bb); + bb = bb1; + if (bb == NULL) { + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + } + if (bb2 > 0) { + bb = lshift(bb, bb2); + if (bb == NULL) { + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + } + if (bd5 > 0) { + bd = pow5mult(bd, bd5); + if (bd == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd0); + goto failed_malloc; + } + } + if (bd2 > 0) { + bd = lshift(bd, bd2); + if (bd == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd0); + goto failed_malloc; + } + } + if (bs2 > 0) { + bs = lshift(bs, bs2); + if (bs == NULL) { + Bfree(bb); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + } + + /* Now bd, bb and bs are scaled versions of tdv, srv and 0.5 ulp(srv), + respectively. Compute the difference |tdv - srv|, and compare + with 0.5 ulp(srv). */ + + delta = diff(bb, bd); + if (delta == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + dsign = delta->sign; + delta->sign = 0; + i = cmp(delta, bs); + if (bc.nd > nd && i <= 0) { + if (dsign) + break; /* Must use bigcomp(). */ + + /* Here rv overestimates the truncated decimal value by at most + 0.5 ulp(rv). Hence rv either overestimates the true decimal + value by <= 0.5 ulp(rv), or underestimates it by some small + amount (< 0.1 ulp(rv)); either way, rv is within 0.5 ulps of + the true decimal value, so it's possible to exit. + + Exception: if scaled rv is a normal exact power of 2, but not + DBL_MIN, then rv - 0.5 ulp(rv) takes us all the way down to the + next double, so the correctly rounded result is either rv - 0.5 + ulp(rv) or rv; in this case, use bigcomp to distinguish. */ + + if (!word1(&rv) && !(word0(&rv) & Bndry_mask)) { + /* rv can't be 0, since it's an overestimate for some + nonzero value. So rv is a normal power of 2. */ + j = (int)(word0(&rv) & Exp_mask) >> Exp_shift; + /* rv / 2^bc.scale = 2^(j - 1023 - bc.scale); use bigcomp if + rv / 2^bc.scale >= 2^-1021. */ + if (j - bc.scale >= 2) { + dval(&rv) -= 0.5 * sulp(&rv, &bc); + break; /* Use bigcomp. */ + } + } + + { + bc.nd = nd; + i = -1; /* Discarded digits make delta smaller. */ + } + } + + if (i < 0) { + /* Error is less than half an ulp -- check for + * special case of mantissa a power of two. + */ + if (dsign || word1(&rv) || word0(&rv) & Bndry_mask + || (word0(&rv) & Exp_mask) <= (2*P+1)*Exp_msk1 + ) { + break; + } + if (!delta->x[0] && delta->wds <= 1) { + /* exact result */ + break; + } + delta = lshift(delta,Log2P); + if (delta == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + if (cmp(delta, bs) > 0) + goto drop_down; + break; + } + if (i == 0) { + /* exactly half-way between */ + if (dsign) { + if ((word0(&rv) & Bndry_mask1) == Bndry_mask1 + && word1(&rv) == ( + (bc.scale && + (y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1) ? + (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) : + 0xffffffff)) { + /*boundary case -- increment exponent*/ + word0(&rv) = (word0(&rv) & Exp_mask) + + Exp_msk1 + ; + word1(&rv) = 0; + /* dsign = 0; */ + break; + } + } + else if (!(word0(&rv) & Bndry_mask) && !word1(&rv)) { + drop_down: + /* boundary case -- decrement exponent */ + if (bc.scale) { + L = word0(&rv) & Exp_mask; + if (L <= (2*P+1)*Exp_msk1) { + if (L > (P+2)*Exp_msk1) + /* round even ==> */ + /* accept rv */ + break; + /* rv = smallest denormal */ + if (bc.nd > nd) + break; + goto undfl; + } + } + L = (word0(&rv) & Exp_mask) - Exp_msk1; + word0(&rv) = L | Bndry_mask1; + word1(&rv) = 0xffffffff; + break; + } + if (!odd) + break; + if (dsign) + dval(&rv) += sulp(&rv, &bc); + else { + dval(&rv) -= sulp(&rv, &bc); + if (!dval(&rv)) { + if (bc.nd >nd) + break; + goto undfl; + } + } + /* dsign = 1 - dsign; */ + break; + } + if ((aadj = ratio(delta, bs)) <= 2.) { + if (dsign) + aadj = aadj1 = 1.; + else if (word1(&rv) || word0(&rv) & Bndry_mask) { + if (word1(&rv) == Tiny1 && !word0(&rv)) { + if (bc.nd >nd) + break; + goto undfl; + } + aadj = 1.; + aadj1 = -1.; + } + else { + /* special case -- power of FLT_RADIX to be */ + /* rounded down... */ + + if (aadj < 2./FLT_RADIX) + aadj = 1./FLT_RADIX; + else + aadj *= 0.5; + aadj1 = -aadj; + } + } + else { + aadj *= 0.5; + aadj1 = dsign ? aadj : -aadj; + if (Flt_Rounds == 0) + aadj1 += 0.5; + } + y = word0(&rv) & Exp_mask; + + /* Check for overflow */ + + if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) { + dval(&rv0) = dval(&rv); + word0(&rv) -= P*Exp_msk1; + adj.d = aadj1 * ulp(&rv); + dval(&rv) += adj.d; + if ((word0(&rv) & Exp_mask) >= + Exp_msk1*(DBL_MAX_EXP+Bias-P)) { + if (word0(&rv0) == Big0 && word1(&rv0) == Big1) { + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(bd0); + Bfree(delta); + goto ovfl; + } + word0(&rv) = Big0; + word1(&rv) = Big1; + goto cont; + } + else + word0(&rv) += P*Exp_msk1; + } + else { + if (bc.scale && y <= 2*P*Exp_msk1) { + if (aadj <= 0x7fffffff) { + if ((z = (ULong)aadj) <= 0) + z = 1; + aadj = z; + aadj1 = dsign ? aadj : -aadj; + } + dval(&aadj2) = aadj1; + word0(&aadj2) += (2*P+1)*Exp_msk1 - y; + aadj1 = dval(&aadj2); + } + adj.d = aadj1 * ulp(&rv); + dval(&rv) += adj.d; + } + z = word0(&rv) & Exp_mask; + if (bc.nd == nd) { + if (!bc.scale) + if (y == z) { + /* Can we stop now? */ + L = (Long)aadj; + aadj -= L; + /* The tolerances below are conservative. */ + if (dsign || word1(&rv) || word0(&rv) & Bndry_mask) { + if (aadj < .4999999 || aadj > .5000001) + break; + } + else if (aadj < .4999999/FLT_RADIX) + break; + } + } + cont: + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(delta); + } + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(bd0); + Bfree(delta); + if (bc.nd > nd) { + error = bigcomp(&rv, s0, &bc); + if (error) + goto failed_malloc; + } + + if (bc.scale) { + word0(&rv0) = Exp_1 - 2*P*Exp_msk1; + word1(&rv0) = 0; + dval(&rv) *= dval(&rv0); + } + + ret: + return sign ? -dval(&rv) : dval(&rv); + + parse_error: + return 0.0; + + failed_malloc: + errno = ENOMEM; + return -1.0; + + undfl: + return sign ? -0.0 : 0.0; + + ovfl: + errno = ERANGE; + /* Can't trust HUGE_VAL */ + word0(&rv) = Exp_mask; + word1(&rv) = 0; + return sign ? -dval(&rv) : dval(&rv); + +} + +static char * +rv_alloc(int i) +{ + int j, k, *r; + + j = sizeof(ULong); + for(k = 0; + sizeof(Bigint) - sizeof(ULong) - sizeof(int) + j <= (unsigned)i; + j <<= 1) + k++; + r = (int*)Balloc(k); + if (r == NULL) + return NULL; + *r = k; + return (char *)(r+1); +} + +static char * +nrv_alloc(char *s, char **rve, int n) +{ + char *rv, *t; + + rv = rv_alloc(n); + if (rv == NULL) + return NULL; + t = rv; + while((*t = *s++)) t++; + if (rve) + *rve = t; + return rv; +} + +/* freedtoa(s) must be used to free values s returned by dtoa + * when MULTIPLE_THREADS is #defined. It should be used in all cases, + * but for consistency with earlier versions of dtoa, it is optional + * when MULTIPLE_THREADS is not defined. + */ + +void +sb_freedtoa(char *s) +{ + Bigint *b = (Bigint *)((int *)s - 1); + b->maxwds = 1 << (b->k = *(int*)b); + Bfree(b); +} + +/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string. + * + * Inspired by "How to Print Floating-Point Numbers Accurately" by + * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126]. + * + * Modifications: + * 1. Rather than iterating, we use a simple numeric overestimate + * to determine k = floor(log10(d)). We scale relevant + * quantities using O(log2(k)) rather than O(k) multiplications. + * 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't + * try to generate digits strictly left to right. Instead, we + * compute with fewer bits and propagate the carry if necessary + * when rounding the final digit up. This is often faster. + * 3. Under the assumption that input will be rounded nearest, + * mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22. + * That is, we allow equality in stopping tests when the + * round-nearest rule will give the same floating-point value + * as would satisfaction of the stopping test with strict + * inequality. + * 4. We remove common factors of powers of 2 from relevant + * quantities. + * 5. When converting floating-point integers less than 1e16, + * we use floating-point arithmetic rather than resorting + * to multiple-precision integers. + * 6. When asked to produce fewer than 15 digits, we first try + * to get by with floating-point arithmetic; we resort to + * multiple-precision integer arithmetic only if we cannot + * guarantee that the floating-point calculation has given + * the correctly rounded result. For k requested digits and + * "uniformly" distributed input, the probability is + * something like 10^(k-15) that we must resort to the Long + * calculation. + */ + +/* Additional notes (METD): (1) returns NULL on failure. (2) to avoid memory + leakage, a successful call to sb_dtoa should always be matched by a + call to sb_freedtoa. */ + +char * +sb_dtoa(double dd, int mode, int ndigits, + int *decpt, int *sign, char **rve) +{ + /* Arguments ndigits, decpt, sign are similar to those + of ecvt and fcvt; trailing zeros are suppressed from + the returned string. If not null, *rve is set to point + to the end of the return value. If d is +-Infinity or NaN, + then *decpt is set to 9999. + + mode: + 0 ==> shortest string that yields d when read in + and rounded to nearest. + 1 ==> like 0, but with Steele & White stopping rule; + e.g. with IEEE P754 arithmetic , mode 0 gives + 1e23 whereas mode 1 gives 9.999999999999999e22. + 2 ==> max(1,ndigits) significant digits. This gives a + return value similar to that of ecvt, except + that trailing zeros are suppressed. + 3 ==> through ndigits past the decimal point. This + gives a return value similar to that from fcvt, + except that trailing zeros are suppressed, and + ndigits can be negative. + 4,5 ==> similar to 2 and 3, respectively, but (in + round-nearest mode) with the tests of mode 0 to + possibly return a shorter string that rounds to d. + With IEEE arithmetic and compilation with + -DHonor_FLT_ROUNDS, modes 4 and 5 behave the same + as modes 2 and 3 when FLT_ROUNDS != 1. + 6-9 ==> Debugging modes similar to mode - 4: don't try + fast floating-point estimate (if applicable). + + Values of mode other than 0-9 are treated as mode 0. + + Sufficient space is allocated to the return value + to hold the suppressed trailing zeros. + */ + + int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1, + j, j1, k, k0, k_check, leftright, m2, m5, s2, s5, + spec_case, try_quick; + Long L; + int denorm; + ULong x; + Bigint *b, *b1, *delta, *mlo, *mhi, *S; + U d2, eps, u; + double ds; + char *s, *s0; + + /* set pointers to NULL, to silence gcc compiler warnings and make + cleanup easier on error */ + mlo = mhi = S = 0; + s0 = 0; + + u.d = dd; + if (word0(&u) & Sign_bit) { + /* set sign for everything, including 0's and NaNs */ + *sign = 1; + word0(&u) &= ~Sign_bit; /* clear sign bit */ + } + else + *sign = 0; + + /* quick return for Infinities, NaNs and zeros */ + if ((word0(&u) & Exp_mask) == Exp_mask) + { + /* Infinity or NaN */ + *decpt = 9999; + if (!word1(&u) && !(word0(&u) & 0xfffff)) + return nrv_alloc("Infinity", rve, 8); + return nrv_alloc("NaN", rve, 3); + } + if (!dval(&u)) { + *decpt = 1; + return nrv_alloc("0", rve, 1); + } + + /* compute k = floor(log10(d)). The computation may leave k + one too large, but should never leave k too small. */ + b = d2b(&u, &be, &bbits); + if (b == NULL) + goto failed_malloc; + if ((i = (int)(word0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1)))) { + dval(&d2) = dval(&u); + word0(&d2) &= Frac_mask1; + word0(&d2) |= Exp_11; + + /* log(x) ~=~ log(1.5) + (x-1.5)/1.5 + * log10(x) = log(x) / log(10) + * ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10)) + * log10(d) = (i-Bias)*log(2)/log(10) + log10(d2) + * + * This suggests computing an approximation k to log10(d) by + * + * k = (i - Bias)*0.301029995663981 + * + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 ); + * + * We want k to be too large rather than too small. + * The error in the first-order Taylor series approximation + * is in our favor, so we just round up the constant enough + * to compensate for any error in the multiplication of + * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077, + * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14, + * adding 1e-13 to the constant term more than suffices. + * Hence we adjust the constant term to 0.1760912590558. + * (We could get a more accurate k by invoking log10, + * but this is probably not worthwhile.) + */ + + i -= Bias; + denorm = 0; + } + else { + /* d is denormalized */ + + i = bbits + be + (Bias + (P-1) - 1); + x = i > 32 ? word0(&u) << (64 - i) | word1(&u) >> (i - 32) + : word1(&u) << (32 - i); + dval(&d2) = x; + word0(&d2) -= 31*Exp_msk1; /* adjust exponent */ + i -= (Bias + (P-1) - 1) + 1; + denorm = 1; + } + ds = (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 + + i*0.301029995663981; + k = (int)ds; + if (ds < 0. && ds != k) + k--; /* want k = floor(ds) */ + k_check = 1; + if (k >= 0 && k <= Ten_pmax) { + if (dval(&u) < tens[k]) + k--; + k_check = 0; + } + j = bbits - i - 1; + if (j >= 0) { + b2 = 0; + s2 = j; + } + else { + b2 = -j; + s2 = 0; + } + if (k >= 0) { + b5 = 0; + s5 = k; + s2 += k; + } + else { + b2 -= k; + b5 = -k; + s5 = 0; + } + if (mode < 0 || mode > 9) + mode = 0; + + try_quick = 1; + + if (mode > 5) { + mode -= 4; + try_quick = 0; + } + leftright = 1; + ilim = ilim1 = -1; /* Values for cases 0 and 1; done here to */ + /* silence erroneous "gcc -Wall" warning. */ + switch(mode) { + case 0: + case 1: + i = 18; + ndigits = 0; + break; + case 2: + leftright = 0; + /* no break */ + case 4: + if (ndigits <= 0) + ndigits = 1; + ilim = ilim1 = i = ndigits; + break; + case 3: + leftright = 0; + /* no break */ + case 5: + i = ndigits + k + 1; + ilim = i; + ilim1 = i - 1; + if (i <= 0) + i = 1; + } + s0 = rv_alloc(i); + if (s0 == NULL) + goto failed_malloc; + s = s0; + + + if (ilim >= 0 && ilim <= Quick_max && try_quick) { + + /* Try to get by with floating-point arithmetic. */ + + i = 0; + dval(&d2) = dval(&u); + k0 = k; + ilim0 = ilim; + ieps = 2; /* conservative */ + if (k > 0) { + ds = tens[k&0xf]; + j = k >> 4; + if (j & Bletch) { + /* prevent overflows */ + j &= Bletch - 1; + dval(&u) /= bigtens[n_bigtens-1]; + ieps++; + } + for(; j; j >>= 1, i++) + if (j & 1) { + ieps++; + ds *= bigtens[i]; + } + dval(&u) /= ds; + } + else if ((j1 = -k)) { + dval(&u) *= tens[j1 & 0xf]; + for(j = j1 >> 4; j; j >>= 1, i++) + if (j & 1) { + ieps++; + dval(&u) *= bigtens[i]; + } + } + if (k_check && dval(&u) < 1. && ilim > 0) { + if (ilim1 <= 0) + goto fast_failed; + ilim = ilim1; + k--; + dval(&u) *= 10.; + ieps++; + } + dval(&eps) = ieps*dval(&u) + 7.; + word0(&eps) -= (P-1)*Exp_msk1; + if (ilim == 0) { + S = mhi = 0; + dval(&u) -= 5.; + if (dval(&u) > dval(&eps)) + goto one_digit; + if (dval(&u) < -dval(&eps)) + goto no_digits; + goto fast_failed; + } + if (leftright) { + /* Use Steele & White method of only + * generating digits needed. + */ + dval(&eps) = 0.5/tens[ilim-1] - dval(&eps); + for(i = 0;;) { + L = (Long)dval(&u); + dval(&u) -= L; + *s++ = '0' + (int)L; + if (dval(&u) < dval(&eps)) + goto ret1; + if (1. - dval(&u) < dval(&eps)) + goto bump_up; + if (++i >= ilim) + break; + dval(&eps) *= 10.; + dval(&u) *= 10.; + } + } + else { + /* Generate ilim digits, then fix them up. */ + dval(&eps) *= tens[ilim-1]; + for(i = 1;; i++, dval(&u) *= 10.) { + L = (Long)(dval(&u)); + if (!(dval(&u) -= L)) + ilim = i; + *s++ = '0' + (int)L; + if (i == ilim) { + if (dval(&u) > 0.5 + dval(&eps)) + goto bump_up; + else if (dval(&u) < 0.5 - dval(&eps)) { + while(*--s == '0'); + s++; + goto ret1; + } + break; + } + } + } + fast_failed: + s = s0; + dval(&u) = dval(&d2); + k = k0; + ilim = ilim0; + } + + /* Do we have a "small" integer? */ + + if (be >= 0 && k <= Int_max) { + /* Yes. */ + ds = tens[k]; + if (ndigits < 0 && ilim <= 0) { + S = mhi = 0; + if (ilim < 0 || dval(&u) <= 5*ds) + goto no_digits; + goto one_digit; + } + for(i = 1;; i++, dval(&u) *= 10.) { + L = (Long)(dval(&u) / ds); + dval(&u) -= L*ds; + *s++ = '0' + (int)L; + if (!dval(&u)) { + break; + } + if (i == ilim) { + dval(&u) += dval(&u); + if (dval(&u) > ds || (dval(&u) == ds && L & 1)) { + bump_up: + while(*--s == '9') + if (s == s0) { + k++; + *s = '0'; + break; + } + ++*s++; + } + break; + } + } + goto ret1; + } + + m2 = b2; + m5 = b5; + if (leftright) { + i = + denorm ? be + (Bias + (P-1) - 1 + 1) : + 1 + P - bbits; + b2 += i; + s2 += i; + mhi = i2b(1); + if (mhi == NULL) + goto failed_malloc; + } + if (m2 > 0 && s2 > 0) { + i = m2 < s2 ? m2 : s2; + b2 -= i; + m2 -= i; + s2 -= i; + } + if (b5 > 0) { + if (leftright) { + if (m5 > 0) { + mhi = pow5mult(mhi, m5); + if (mhi == NULL) + goto failed_malloc; + b1 = mult(mhi, b); + Bfree(b); + b = b1; + if (b == NULL) + goto failed_malloc; + } + if ((j = b5 - m5)) { + b = pow5mult(b, j); + if (b == NULL) + goto failed_malloc; + } + } + else { + b = pow5mult(b, b5); + if (b == NULL) + goto failed_malloc; + } + } + S = i2b(1); + if (S == NULL) + goto failed_malloc; + if (s5 > 0) { + S = pow5mult(S, s5); + if (S == NULL) + goto failed_malloc; + } + + /* Check for special case that d is a normalized power of 2. */ + + spec_case = 0; + if ((mode < 2 || leftright) + ) { + if (!word1(&u) && !(word0(&u) & Bndry_mask) + && word0(&u) & (Exp_mask & ~Exp_msk1) + ) { + /* The special case */ + b2 += Log2P; + s2 += Log2P; + spec_case = 1; + } + } + + /* Arrange for convenient computation of quotients: + * shift left if necessary so divisor has 4 leading 0 bits. + * + * Perhaps we should just compute leading 28 bits of S once + * and for all and pass them and a shift to quorem, so it + * can do shifts and ors to compute the numerator for q. + */ +#define iInc 28 + i = dshift(S, s2); + b2 += i; + m2 += i; + s2 += i; + if (b2 > 0) { + b = lshift(b, b2); + if (b == NULL) + goto failed_malloc; + } + if (s2 > 0) { + S = lshift(S, s2); + if (S == NULL) + goto failed_malloc; + } + if (k_check) { + if (cmp(b,S) < 0) { + k--; + b = multadd(b, 10, 0); /* we botched the k estimate */ + if (b == NULL) + goto failed_malloc; + if (leftright) { + mhi = multadd(mhi, 10, 0); + if (mhi == NULL) + goto failed_malloc; + } + ilim = ilim1; + } + } + if (ilim <= 0 && (mode == 3 || mode == 5)) { + if (ilim < 0) { + /* no digits, fcvt style */ + no_digits: + k = -1 - ndigits; + goto ret; + } + else { + S = multadd(S, 5, 0); + if (S == NULL) + goto failed_malloc; + if (cmp(b, S) <= 0) + goto no_digits; + } + one_digit: + *s++ = '1'; + k++; + goto ret; + } + if (leftright) { + if (m2 > 0) { + mhi = lshift(mhi, m2); + if (mhi == NULL) + goto failed_malloc; + } + + /* Compute mlo -- check for special case + * that d is a normalized power of 2. + */ + + mlo = mhi; + if (spec_case) { + mhi = Balloc(mhi->k); + if (mhi == NULL) + goto failed_malloc; + Bcopy(mhi, mlo); + mhi = lshift(mhi, Log2P); + if (mhi == NULL) + goto failed_malloc; + } + + for(i = 1;;i++) { + dig = quorem(b,S) + '0'; + /* Do we yet have the shortest decimal string + * that will round to d? + */ + j = cmp(b, mlo); + delta = diff(S, mhi); + if (delta == NULL) + goto failed_malloc; + j1 = delta->sign ? 1 : cmp(b, delta); + Bfree(delta); + if (j1 == 0 && mode != 1 && !(word1(&u) & 1) + ) { + if (dig == '9') + goto round_9_up; + if (j > 0) + dig++; + *s++ = dig; + goto ret; + } + if (j < 0 || (j == 0 && mode != 1 + && !(word1(&u) & 1) + )) { + if (!b->x[0] && b->wds <= 1) { + goto accept_dig; + } + if (j1 > 0) { + b = lshift(b, 1); + if (b == NULL) + goto failed_malloc; + j1 = cmp(b, S); + if ((j1 > 0 || (j1 == 0 && dig & 1)) + && dig++ == '9') + goto round_9_up; + } + accept_dig: + *s++ = dig; + goto ret; + } + if (j1 > 0) { + if (dig == '9') { /* possible if i == 1 */ + round_9_up: + *s++ = '9'; + goto roundoff; + } + *s++ = dig + 1; + goto ret; + } + *s++ = dig; + if (i == ilim) + break; + b = multadd(b, 10, 0); + if (b == NULL) + goto failed_malloc; + if (mlo == mhi) { + mlo = mhi = multadd(mhi, 10, 0); + if (mlo == NULL) + goto failed_malloc; + } + else { + mlo = multadd(mlo, 10, 0); + if (mlo == NULL) + goto failed_malloc; + mhi = multadd(mhi, 10, 0); + if (mhi == NULL) + goto failed_malloc; + } + } + } + else + for(i = 1;; i++) { + *s++ = dig = quorem(b,S) + '0'; + if (!b->x[0] && b->wds <= 1) { + goto ret; + } + if (i >= ilim) + break; + b = multadd(b, 10, 0); + if (b == NULL) + goto failed_malloc; + } + + /* Round off last digit */ + + b = lshift(b, 1); + if (b == NULL) + goto failed_malloc; + j = cmp(b, S); + if (j > 0 || (j == 0 && dig & 1)) { + roundoff: + while(*--s == '9') + if (s == s0) { + k++; + *s++ = '1'; + goto ret; + } + ++*s++; + } + else { + while(*--s == '0'); + s++; + } + ret: + Bfree(S); + if (mhi) { + if (mlo && mlo != mhi) + Bfree(mlo); + Bfree(mhi); + } + ret1: + Bfree(b); + *s = 0; + *decpt = k + 1; + if (rve) + *rve = s; + return s0; + failed_malloc: + if (S) + Bfree(S); + if (mlo && mlo != mhi) + Bfree(mlo); + if (mhi) + Bfree(mhi); + if (b) + Bfree(b); + if (s0) + sb_freedtoa(s0); + return NULL; +} +#ifdef __cplusplus +} +#endif diff --git a/media/sphinxbase/src/libsphinxbase/util/err.c b/media/sphinxbase/src/libsphinxbase/util/err.c new file mode 100644 index 000000000..1a498b639 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/err.c @@ -0,0 +1,297 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file err.c + * @brief Somewhat antiquated logging and error interface. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/prim_type.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/ckd_alloc.h" + +static FILE* logfp = NULL; +static int logfp_disabled = FALSE; + +static int sphinx_debug_level; + +#if defined(__ANDROID__) +#include +static void +err_logcat_cb(void* user_data, err_lvl_t level, const char *fmt, ...); +#elif defined(_WIN32_WCE) +#include +#define vsnprintf _vsnprintf +static void +err_wince_cb(void* user_data, err_lvl_t level, const char *fmt, ...); +#endif + +#if defined(__ANDROID__) +static err_cb_f err_cb = err_logcat_cb; +#elif defined(_WIN32_WCE) +static err_cb_f err_cb = err_wince_cb; +#else +static err_cb_f err_cb = err_logfp_cb; +#endif +static void* err_user_data; + +void +err_msg(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...) +{ + static const char *err_prefix[ERR_MAX] = { + "DEBUG", "INFO", "INFOCONT", "WARN", "ERROR", "FATAL" + }; + + char msg[1024]; + va_list ap; + + if (!err_cb) + return; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + if (path) { + const char *fname = path2basename(path); + if (lvl == ERR_INFOCONT) + err_cb(err_user_data, lvl, "%s(%ld): %s", fname, ln, msg); + else if (lvl == ERR_INFO) + err_cb(err_user_data, lvl, "%s: %s(%ld): %s", err_prefix[lvl], fname, ln, msg); + else + err_cb(err_user_data, lvl, "%s: \"%s\", line %ld: %s", err_prefix[lvl], fname, ln, msg); + } else { + err_cb(err_user_data, lvl, "%s", msg); + } +} + +#ifdef _WIN32_WCE /* No strerror for WinCE, so a separate implementation */ +void +err_msg_system(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...) +{ + static const char *err_prefix[ERR_MAX] = { + "DEBUG", "INFO", "INFOCONT", "WARN", "ERROR", "FATAL" + }; + + va_list ap; + LPVOID error_wstring; + DWORD error; + char msg[1024]; + char error_string[1024]; + + if (!err_cb) + return; + + error = GetLastError(); + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error, + 0, // Default language + (LPTSTR) &error_wstring, + 0, + NULL); + wcstombs(error_string, error_wstring, 1023); + LocalFree(error_wstring); + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + if (path) { + const char *fname = path2basename(path); + if (lvl == ERR_INFOCONT) + err_cb(err_user_data, lvl, "%s(%ld): %s: %s\n", fname, ln, msg, error_string); + else if (lvl == ERR_INFO) + err_cb(err_user_data, lvl, "%s: %s(%ld): %s: %s\n", err_prefix[lvl], fname, ln, msg, error_string); + else + err_cb(err_user_data, lvl, "%s: \"%s\", line %ld: %s: %s\n", err_prefix[lvl], fname, ln, msg, error_string); + } else { + err_cb(err_user_data, lvl, "%s: %s\n", msg, error_string); + } +} +#else +void +err_msg_system(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...) +{ + int local_errno = errno; + + static const char *err_prefix[ERR_MAX] = { + "DEBUG", "INFO", "INFOCONT", "WARN", "ERROR", "FATAL" + }; + + char msg[1024]; + va_list ap; + + if (!err_cb) + return; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + if (path) { + const char *fname = path2basename(path); + if (lvl == ERR_INFOCONT) + err_cb(err_user_data, lvl, "%s(%ld): %s: %s\n", fname, ln, msg, strerror(local_errno)); + else if (lvl == ERR_INFO) + err_cb(err_user_data, lvl, "%s: %s(%ld): %s: %s\n", err_prefix[lvl], fname, ln, msg, strerror(local_errno)); + else + err_cb(err_user_data, lvl, "%s: \"%s\", line %ld: %s: %s\n", err_prefix[lvl], fname, ln, msg, strerror(local_errno)); + } else { + err_cb(err_user_data, lvl, "%s: %s\n", msg, strerror(local_errno)); + } +} +#endif + +#if defined(__ANDROID__) +static void +err_logcat_cb(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + static const int android_level[ERR_MAX] = {ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, + ANDROID_LOG_INFO, ANDROID_LOG_WARN, ANDROID_LOG_ERROR, ANDROID_LOG_ERROR}; + + va_list ap; + va_start(ap, fmt); + __android_log_vprint(android_level[lvl], "cmusphinx", fmt, ap); + va_end(ap); +} +#elif defined(_WIN32_WCE) +static void +err_wince_cb(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + char msg[1024]; + WCHAR *wmsg; + size_t size; + va_list ap; + + va_start(ap, fmt); + _vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + size = mbstowcs(NULL, msg, 0) + 1; + wmsg = ckd_calloc(size, sizeof(*wmsg)); + mbstowcs(wmsg, msg, size); + + OutputDebugStringW(wmsg); + ckd_free(wmsg); +} +#else +void +err_logfp_cb(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + va_list ap; + FILE *fp = err_get_logfp(); + + if (!fp) + return; + + va_start(ap, fmt); + vfprintf(fp, fmt, ap); + va_end(ap); +} +#endif + +int +err_set_logfile(const char *path) +{ + FILE *newfp, *oldfp; + + if ((newfp = fopen(path, "a")) == NULL) + return -1; + oldfp = err_get_logfp(); + err_set_logfp(newfp); + if (oldfp != NULL && oldfp != stdout && oldfp != stderr) + fclose(oldfp); + return 0; +} + +void +err_set_logfp(FILE *stream) +{ + if (stream == NULL) { + logfp_disabled = TRUE; + logfp = NULL; + return; + } + logfp_disabled = FALSE; + logfp = stream; + return; +} + +FILE * +err_get_logfp(void) +{ + if (logfp_disabled) + return NULL; + if (logfp == NULL) + return stderr; + + return logfp; +} + +int +err_set_debug_level(int level) +{ + int prev = sphinx_debug_level; + sphinx_debug_level = level; + return prev; +} + +int +err_get_debug_level(void) +{ + return sphinx_debug_level; +} + +void +err_set_callback(err_cb_f cb, void* user_data) +{ + err_cb = cb; + err_user_data= user_data; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/errno.c b/media/sphinxbase/src/libsphinxbase/util/errno.c new file mode 100644 index 000000000..844b6f538 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/errno.c @@ -0,0 +1,51 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + */ +/********************************************************************* + * + * File: errno.c + * + * Description: functions and variables missing from Windows CE standard + * library + * + * Author: Silvio Moioli + * + *********************************************************************/ + +#include + +#if defined(_WIN32_WCE) +int errno; +#endif diff --git a/media/sphinxbase/src/libsphinxbase/util/f2c_lite.c b/media/sphinxbase/src/libsphinxbase/util/f2c_lite.c new file mode 100644 index 000000000..58fbc4ee6 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/f2c_lite.c @@ -0,0 +1,551 @@ +#include +#include +#include +#include +#include + +#include "sphinxbase/f2c.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + + +extern void +s_wsfe(cilist * f) +{; +} +extern void +e_wsfe(void) +{; +} +extern void +do_fio(integer * c, char *s, ftnlen l) +{; +} + +/* You'll want this if you redo the *_lite.c files with the -C option + * to f2c for checking array subscripts. (It's not suggested you do that + * for production use, of course.) */ +extern int +s_rnge(char *var, int index, char *routine, int lineno) +{ + fprintf(stderr, + "array index out-of-bounds for %s[%d] in routine %s:%d\n", var, + index, routine, lineno); + fflush(stderr); + assert(2+2 == 5); + return 0; +} + + +#ifdef KR_headers +extern double sqrt(); +float +f__cabs(real, imag) +float real, imag; +#else +#undef abs + +float +f__cabs(float real, float imag) +#endif +{ + float temp; + + if (real < 0) + real = -real; + if (imag < 0) + imag = -imag; + if (imag > real) { + temp = real; + real = imag; + imag = temp; + } + if ((imag + real) == real) + return ((float) real); + + temp = imag / real; + temp = real * sqrt(1.0 + temp * temp); /*overflow!! */ + return (temp); +} + + +VOID +#ifdef KR_headers +s_cnjg(r, z) +complex *r, *z; +#else +s_cnjg(complex * r, complex * z) +#endif +{ + r->r = z->r; + r->i = -z->i; +} + + +#ifdef KR_headers +float +r_imag(z) +complex *z; +#else +float +r_imag(complex * z) +#endif +{ + return (z->i); +} + + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +double log(); +float +r_lg10(x) +real *x; +#else +#undef abs + +float +r_lg10(real * x) +#endif +{ + return (log10e * log(*x)); +} + + +#ifdef KR_headers +float +r_sign(a, b) +real *a, *b; +#else +float +r_sign(real * a, real * b) +#endif +{ + float x; + x = (*a >= 0 ? *a : -*a); + return (*b >= 0 ? x : -x); +} + + +#ifdef KR_headers +double floor(); +integer +i_dnnt(x) +real *x; +#else +#undef abs + +integer +i_dnnt(real * x) +#endif +{ + return ((*x) >= 0 ? floor(*x + .5) : -floor(.5 - *x)); +} + + +#ifdef KR_headers +double pow(); +double +pow_dd(ap, bp) +doublereal *ap, *bp; +#else +#undef abs + +double +pow_dd(doublereal * ap, doublereal * bp) +#endif +{ + return (pow(*ap, *bp)); +} + + +#ifdef KR_headers +float +pow_ri(ap, bp) +real *ap; +integer *bp; +#else +float +pow_ri(real * ap, integer * bp) +#endif +{ + float pow, x; + integer n; + unsigned long u; + + pow = 1; + x = *ap; + n = *bp; + + if (n != 0) { + if (n < 0) { + n = -n; + x = 1 / x; + } + for (u = n;;) { + if (u & 01) + pow *= x; + if (u >>= 1) + x *= x; + else + break; + } + } + return (pow); +} + +/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the + * target of a concatenation to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90). + */ +#define NO_OVERWRITE + + +#ifndef NO_OVERWRITE + +#undef abs +#ifdef KR_headers +extern char *F77_aloc(); +extern void free(); +extern void exit_(); +#else + +extern char *F77_aloc(ftnlen, char *); +#endif + +#endif /* NO_OVERWRITE */ + +VOID +#ifdef KR_headers +s_cat(lp, rpp, rnp, np, ll) +char *lp, *rpp[]; +ftnlen rnp[], *np, ll; +#else +s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen * np, ftnlen ll) +#endif +{ + ftnlen i, nc; + char *rp; + ftnlen n = *np; +#ifndef NO_OVERWRITE + ftnlen L, m; + char *lp0, *lp1; + + lp0 = 0; + lp1 = lp; + L = ll; + i = 0; + while (i < n) { + rp = rpp[i]; + m = rnp[i++]; + if (rp >= lp1 || rp + m <= lp) { + if ((L -= m) <= 0) { + n = i; + break; + } + lp1 += m; + continue; + } + lp0 = lp; + lp = lp1 = F77_aloc(L = ll, "s_cat"); + break; + } + lp1 = lp; +#endif /* NO_OVERWRITE */ + for (i = 0; i < n; ++i) { + nc = ll; + if (rnp[i] < nc) + nc = rnp[i]; + ll -= nc; + rp = rpp[i]; + while (--nc >= 0) + *lp++ = *rp++; + } + while (--ll >= 0) + *lp++ = ' '; +#ifndef NO_OVERWRITE + if (lp0) { + memmove(lp0, lp1, L); + free(lp1); + } +#endif +} + + +/* compare two strings */ + +#ifdef KR_headers +integer +s_cmp(a0, b0, la, lb) +char *a0, *b0; +ftnlen la, lb; +#else +integer +s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) +#endif +{ + register unsigned char *a, *aend, *b, *bend; + a = (unsigned char *) a0; + b = (unsigned char *) b0; + aend = a + la; + bend = b + lb; + + if (la <= lb) { + while (a < aend) + if (*a != *b) + return (*a - *b); + else { + ++a; + ++b; + } + + while (b < bend) + if (*b != ' ') + return (' ' - *b); + else + ++b; + } + + else { + while (b < bend) + if (*a == *b) { + ++a; + ++b; + } + else + return (*a - *b); + while (a < aend) + if (*a != ' ') + return (*a - ' '); + else + ++a; + } + return (0); +} + +/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the + * target of an assignment to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90), + * as in a(2:5) = a(4:7) . + */ + + + +/* assign strings: a = b */ + +#ifdef KR_headers +VOID +s_copy(a, b, la, lb) +register char *a, *b; +ftnlen la, lb; +#else +void +s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) +#endif +{ + register char *aend, *bend; + + aend = a + la; + + if (la <= lb) +#ifndef NO_OVERWRITE + if (a <= b || a >= b + la) +#endif + while (a < aend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else + for (b += la; a < aend;) + *--aend = *--b; +#endif + + else { + bend = b + lb; +#ifndef NO_OVERWRITE + if (a <= b || a >= bend) +#endif + while (b < bend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else { + a += lb; + while (b < bend) + *--a = *--bend; + a += lb; + } +#endif + while (a < aend) + *a++ = ' '; + } +} + + +#ifdef KR_headers +float f__cabs(); +float +z_abs(z) +complex *z; +#else +float f__cabs(float, float); +float +z_abs(complex * z) +#endif +{ + return (f__cabs(z->r, z->i)); +} + + +#ifdef KR_headers +extern void sig_die(); +VOID +z_div(c, a, b) +complex *a, *b, *c; +#else +extern void sig_die(char *, int); +void +z_div(complex * c, complex * a, complex * b) +#endif +{ + float ratio, den; + float abr, abi; + + if ((abr = b->r) < 0.) + abr = -abr; + if ((abi = b->i) < 0.) + abi = -abi; + if (abr <= abi) { + /*Let IEEE Infinties handle this ;( */ + /*if(abi == 0) + sig_die("complex division by zero", 1); */ + ratio = b->r / b->i; + den = b->i * (1 + ratio * ratio); + c->r = (a->r * ratio + a->i) / den; + c->i = (a->i * ratio - a->r) / den; + } + + else { + ratio = b->i / b->r; + den = b->r * (1 + ratio * ratio); + c->r = (a->r + a->i * ratio) / den; + c->i = (a->i - a->r * ratio) / den; + } + +} + + +#ifdef KR_headers +double sqrt(); +double f__cabs(); +VOID +z_sqrt(r, z) +complex *r, *z; +#else +#undef abs + +extern float f__cabs(float, float); +void +z_sqrt(complex * r, complex * z) +#endif +{ + float mag; + + if ((mag = f__cabs(z->r, z->i)) == 0.) + r->r = r->i = 0.; + else if (z->r > 0) { + r->r = sqrt(0.5 * (mag + z->r)); + r->i = z->i / r->r / 2; + } + else { + r->i = sqrt(0.5 * (mag - z->r)); + if (z->i < 0) + r->i = -r->i; + r->r = z->i / r->i / 2; + } +} + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers + integer pow_ii(ap, bp) integer *ap, *bp; +#else + integer pow_ii(integer * ap, integer * bp) +#endif + { + integer pow, x, n; + unsigned long u; + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x != 0 ? 1 / x : 0; + n = -n; + } u = n; + for (pow = 1;;) { + if (u & 01) + pow *= x; + if (u >>= 1) + x *= x; + else + break; + } + return (pow); + } +#ifdef __cplusplus +} +#endif + +#ifdef KR_headers +extern void f_exit(); +VOID +s_stop(s, n) +char *s; +ftnlen n; +#else +#undef abs +#undef min +#undef max +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus + extern "C" { +#endif + void f_exit(void); + + int s_stop(char *s, ftnlen n) +#endif + { + int i; + + if (n > 0) { + fprintf(stderr, "STOP "); + for (i = 0; i < n; ++i) + putc(*s++, stderr); + fprintf(stderr, " statement executed\n"); + } +#ifdef NO_ONEXIT + f_exit(); +#endif + exit(0); + +/* We cannot avoid (useless) compiler diagnostics here: */ +/* some compilers complain if there is no return statement, */ +/* and others complain that this one cannot be reached. */ + + return 0; /* NOT REACHED */ + } +#ifdef __cplusplus + } +#endif +#ifdef __cplusplus +} +#endif diff --git a/media/sphinxbase/src/libsphinxbase/util/filename.c b/media/sphinxbase/src/libsphinxbase/util/filename.c new file mode 100644 index 000000000..3f4ae4750 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/filename.c @@ -0,0 +1,120 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * filename.c -- File and path name operations. + */ + +#include +#include +#include +#include + +#include "sphinxbase/filename.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +const char * +path2basename(const char *path) +{ + const char *result; + +#if defined(_WIN32) || defined(__CYGWIN__) + result = strrchr(path, '\\'); +#else + result = strrchr(path, '/'); +#endif + + return (result == NULL ? path : result + 1); +} + +/* Return all leading pathname components */ +void +path2dirname(const char *path, char *dir) +{ + size_t i, l; + + l = strlen(path); +#if defined(_WIN32) || defined(__CYGWIN__) + for (i = l - 1; (i > 0) && !(path[i] == '/' || path[i] == '\\'); --i); +#else + for (i = l - 1; (i > 0) && !(path[i] == '/'); --i); +#endif + if (i == 0) { + dir[0] = '.'; + dir[1] = '\0'; + } else { + memcpy(dir, path, i); + dir[i] = '\0'; + } +} + + +/* Strip off the shortest trailing .xyz suffix */ +void +strip_fileext(const char *path, char *root) +{ + size_t i, l; + + l = strlen(path); + for (i = l - 1; (i > 0) && (path[i] != '.'); --i); + if (i == 0) { + strcpy(root, path); /* Didn't find a . */ + } else { + strncpy(root, path, i); + } +} + +/* Test if this path is absolute. */ +int +path_is_absolute(const char *path) +{ +#if defined(_WIN32) && !defined(_WIN32_WCE) /* FIXME: Also SymbianOS */ + return /* Starts with drive letter : \ or / */ + (strlen(path) >= 3 + && + ((path[0] >= 'A' && path[0] <= 'Z') + || (path[0] >= 'a' && path[0] <= 'z')) + && path[1] == ':' + && (path[2] == '/' || path[2] == '\\')); +#elif defined(_WIN32_WCE) + return path[0] == '\\' || path[0] == '/'; +#else /* Assume Unix */ + return path[0] == '/'; +#endif +} diff --git a/media/sphinxbase/src/libsphinxbase/util/genrand.c b/media/sphinxbase/src/libsphinxbase/util/genrand.c new file mode 100644 index 000000000..a6c69cb4d --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/genrand.c @@ -0,0 +1,234 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright +` notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp +*/ + + +/* + * randgen.c : a portable random generator + * + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: genrand.c,v $ + * Revision 1.2 2005/06/22 03:01:50 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Nov-04 ARCHAN (archan@cs.cmu.edu) at Carnegie Mellon University + * First incorporated from the Mersenne Twister Random + * Number Generator package. It was chosen because it is + * in BSD-license and its performance is quite + * reasonable. Of course if you look at the inventors's + * page. This random generator can actually gives + * 19937-bits period. This is already far from we need. + * This will possibly good enough for the next 10 years. + * + * I also downgrade the code a little bit to avoid Sphinx's + * developers misused it. + */ + + + +#include + +#include "sphinxbase/genrand.h" + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ +#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ + +void init_genrand(unsigned long s); + +void +genrand_seed(unsigned long s) +{ + init_genrand(s); +} + + +static unsigned long mt[N]; /* the array for the state vector */ +static int mti = N + 1; /* mti==N+1 means mt[N] is not initialized */ + +/* initializes mt[N] with a seed */ +void +init_genrand(unsigned long s) +{ + mt[0] = s & 0xffffffffUL; + for (mti = 1; mti < N; mti++) { + mt[mti] = + (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffffUL; + /* for >32 bit machines */ + } +} + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned long +genrand_int32(void) +{ + unsigned long y; + static unsigned long mag01[2] = { 0x0UL, MATRIX_A }; + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= N) { /* generate N words at one time */ + int kk; + + if (mti == N + 1) /* if init_genrand() has not been called, */ + init_genrand(5489UL); /* a default initial seed is used */ + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* generates a random number on [0,0x7fffffff]-interval */ +long +genrand_int31(void) +{ + return (long) (genrand_int32() >> 1); +} + +/* generates a random number on [0,1]-real-interval */ +double +genrand_real1(void) +{ + return genrand_int32() * (1.0 / 4294967295.0); + /* divided by 2^32-1 */ +} + +/* generates a random number on [0,1)-real-interval */ +double +genrand_real2(void) +{ + return genrand_int32() * (1.0 / 4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on (0,1)-real-interval */ +double +genrand_real3(void) +{ + return (((double) genrand_int32()) + 0.5) * (1.0 / 4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +double +genrand_res53(void) +{ + unsigned long a = genrand_int32() >> 5, b = genrand_int32() >> 6; + return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0); +} + +/* These real versions are due to Isaku Wada, 2002/01/09 added */ diff --git a/media/sphinxbase/src/libsphinxbase/util/glist.c b/media/sphinxbase/src/libsphinxbase/util/glist.c new file mode 100644 index 000000000..a97e71978 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/glist.c @@ -0,0 +1,271 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * glist.h -- Module for maintaining a generic, linear linked-list structure. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: glist.c,v $ + * Revision 1.8 2005/06/22 03:02:51 arthchan2003 + * 1, Fixed doxygen documentation, 2, add keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 09-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added glist_chkdup_*(). + * + * 13-Feb-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from earlier version. + */ + + +#include +#include +#include +#include + +#include "sphinxbase/glist.h" +#include "sphinxbase/ckd_alloc.h" + + +glist_t +glist_add_ptr(glist_t g, void *ptr) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.ptr = ptr; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_int32(glist_t g, int32 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.i = (long)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_uint32(glist_t g, uint32 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.ui = (unsigned long)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_float32(glist_t g, float32 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.fl = (double)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_float64(glist_t g, float64 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.fl = (double)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + +void +glist_free(glist_t g) +{ + gnode_t *gn; + + while (g) { + gn = g; + g = gn->next; + ckd_free((void *) gn); + } +} + +int32 +glist_count(glist_t g) +{ + gnode_t *gn; + int32 n; + + for (gn = g, n = 0; gn; gn = gn->next, n++); + return n; +} + + +gnode_t * +glist_tail(glist_t g) +{ + gnode_t *gn; + + if (!g) + return NULL; + + for (gn = g; gn->next; gn = gn->next); + return gn; +} + + +glist_t +glist_reverse(glist_t g) +{ + gnode_t *gn, *nextgn; + gnode_t *rev; + + rev = NULL; + for (gn = g; gn; gn = nextgn) { + nextgn = gn->next; + + gn->next = rev; + rev = gn; + } + + return rev; +} + + +gnode_t * +glist_insert_ptr(gnode_t * gn, void *ptr) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.ptr = ptr; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_int32(gnode_t * gn, int32 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.i = val; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_uint32(gnode_t * gn, uint32 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.ui = val; + newgn->next = gn->next; + + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_float32(gnode_t * gn, float32 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.fl = (double)val; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_float64(gnode_t * gn, float64 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.fl = (double)val; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + +gnode_t * +gnode_free(gnode_t * gn, gnode_t * pred) +{ + gnode_t *next; + + next = gn->next; + if (pred) { + assert(pred->next == gn); + + pred->next = next; + } + + ckd_free((char *) gn); + + return next; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/hash_table.c b/media/sphinxbase/src/libsphinxbase/util/hash_table.c new file mode 100644 index 000000000..eaadc7884 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/hash_table.c @@ -0,0 +1,713 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * hash.c -- Hash table module. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: hash.c,v $ + * Revision 1.5 2005/06/22 03:04:01 arthchan2003 + * 1, Implemented hash_delete and hash_display, 2, Fixed doxygen documentation, 3, Added keyword. + * + * Revision 1.9 2005/05/25 06:17:53 archan + * Delete the test code in cmd_ln.c and fixed platform specific code of hash.c + * + * Revision 1.8 2005/05/24 01:10:54 archan + * Fix a bug when the value only appear in the hash but there is no chain. Also make sure that prev was initialized to NULL. All success cases were tested, but not tested with the deletion is tested. + * + * Revision 1.6 2005/05/24 00:00:45 archan + * Added basic functionalities to hash_t: 1, display and 2, delete a key from a hash. \n + * + * Revision 1.5 2005/05/11 07:01:38 archan + * Added comments on the usage of the current implementation of hash tables. + * + * Revision 1.4 2005/05/03 04:09:11 archan + * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Removed hash_key2hash(). Added hash_enter_bkey() and hash_lookup_bkey(), + * and len attribute to hash_entry_t. + * + * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added hash_key2hash(). + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Included case sensitive/insensitive option. Removed local, static + * maintenance of all hash tables. + * + * 31-Jul-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4018) +#endif + +#include "sphinxbase/hash_table.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/case.h" + + +#if 0 +static void +prime_sieve(int32 max) +{ + char *notprime; + int32 p, pp; + + notprime = (char *) ckd_calloc(max + 1, 1); + p = 2; + for (;;) { + printf("%d\n", p); + for (pp = p + p; pp <= max; pp += p) + notprime[pp] = 1; + for (++p; (p <= max) && notprime[p]; p++); + if (p > max) + break; + } +} +#endif + + +/* + * HACK!! Initial hash table size is restricted by this set of primes. (Of course, + * collision resolution by chaining will accommodate more entries indefinitely, but + * efficiency will drop.) + */ +const int32 prime[] = { + 101, 211, 307, 401, 503, 601, 701, 809, 907, + 1009, 1201, 1601, 2003, 2411, 3001, 4001, 5003, 6007, 7001, 8009, + 9001, + 10007, 12007, 16001, 20011, 24001, 30011, 40009, 50021, 60013, + 70001, 80021, 90001, + 100003, 120011, 160001, 200003, 240007, 300007, 400009, 500009, + 600011, 700001, 800011, 900001, + -1 +}; + + +/** + * This function returns a very large prime. + */ +static int32 +prime_size(int32 size) +{ + int32 i; + + for (i = 0; (prime[i] > 0) && (prime[i] < size); i++); + if (prime[i] <= 0) { + E_WARN("Very large hash table requested (%d entries)\n", size); + --i; + } + return (prime[i]); +} + + +hash_table_t * +hash_table_new(int32 size, int32 casearg) +{ + hash_table_t *h; + + h = (hash_table_t *) ckd_calloc(1, sizeof(hash_table_t)); + h->size = prime_size(size + (size >> 1)); + h->nocase = (casearg == HASH_CASE_NO); + h->table = (hash_entry_t *) ckd_calloc(h->size, sizeof(hash_entry_t)); + /* The above calloc clears h->table[*].key and .next to NULL, i.e. an empty table */ + + return h; +} + + +/* + * Compute hash value for given key string. + * Somewhat tuned for English text word strings. + */ +static uint32 +key2hash(hash_table_t * h, const char *key) +{ + + register const char *cp; + + /** ARCHAN 20050712: + [1236322] libutil\str2words special character bgu + HACK Apply suggested hack of fixing the hash table such that + it can work with extended ascii code . This is a hack because + the best way to solve it is to make sure all character + representation is unsigned character in the first place. (or + better unicode.) + **/ + + /*register char c; */ + register unsigned char c; + register int32 s; + register uint32 hash; + + hash = 0; + s = 0; + + if (h->nocase) { + for (cp = key; *cp; cp++) { + c = *cp; + c = UPPER_CASE(c); + hash += c << s; + s += 5; + if (s >= 25) + s -= 24; + } + } + else { + for (cp = key; *cp; cp++) { + hash += (*cp) << s; + s += 5; + if (s >= 25) + s -= 24; + } + } + + return (hash % h->size); +} + + +static char * +makekey(uint8 * data, size_t len, char *key) +{ + size_t i, j; + + if (!key) + key = (char *) ckd_calloc(len * 2 + 1, sizeof(char)); + + for (i = 0, j = 0; i < len; i++, j += 2) { + key[j] = 'A' + (data[i] & 0x000f); + key[j + 1] = 'J' + ((data[i] >> 4) & 0x000f); + } + key[j] = '\0'; + + return key; +} + + +static int32 +keycmp_nocase(hash_entry_t * entry, const char *key) +{ + char c1, c2; + int32 i; + const char *str; + + str = entry->key; + for (i = 0; i < entry->len; i++) { + c1 = *(str++); + c1 = UPPER_CASE(c1); + c2 = *(key++); + c2 = UPPER_CASE(c2); + if (c1 != c2) + return (c1 - c2); + } + + return 0; +} + + +static int32 +keycmp_case(hash_entry_t * entry, const char *key) +{ + char c1, c2; + int32 i; + const char *str; + + str = entry->key; + for (i = 0; i < entry->len; i++) { + c1 = *(str++); + c2 = *(key++); + if (c1 != c2) + return (c1 - c2); + } + + return 0; +} + + +/* + * Lookup entry with hash-value hash in table h for given key + * Return value: hash_entry_t for key + */ +static hash_entry_t * +lookup(hash_table_t * h, uint32 hash, const char *key, size_t len) +{ + hash_entry_t *entry; + + entry = &(h->table[hash]); + if (entry->key == NULL) + return NULL; + + if (h->nocase) { + while (entry && ((entry->len != len) + || (keycmp_nocase(entry, key) != 0))) + entry = entry->next; + } + else { + while (entry && ((entry->len != len) + || (keycmp_case(entry, key) != 0))) + entry = entry->next; + } + + return entry; +} + + +int32 +hash_table_lookup(hash_table_t * h, const char *key, void ** val) +{ + hash_entry_t *entry; + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + + entry = lookup(h, hash, key, len); + if (entry) { + if (val) + *val = entry->val; + return 0; + } + else + return -1; +} + +int32 +hash_table_lookup_int32(hash_table_t * h, const char *key, int32 *val) +{ + void *vval; + int32 rv; + + rv = hash_table_lookup(h, key, &vval); + if (rv != 0) + return rv; + if (val) + *val = (int32)(long)vval; + return 0; +} + + +int32 +hash_table_lookup_bkey(hash_table_t * h, const char *key, size_t len, void ** val) +{ + hash_entry_t *entry; + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + entry = lookup(h, hash, key, len); + if (entry) { + if (val) + *val = entry->val; + return 0; + } + else + return -1; +} + +int32 +hash_table_lookup_bkey_int32(hash_table_t * h, const char *key, size_t len, int32 *val) +{ + void *vval; + int32 rv; + + rv = hash_table_lookup_bkey(h, key, len, &vval); + if (rv != 0) + return rv; + if (val) + *val = (int32)(long)vval; + return 0; +} + + +static void * +enter(hash_table_t * h, uint32 hash, const char *key, size_t len, void *val, int32 replace) +{ + hash_entry_t *cur, *new; + + if ((cur = lookup(h, hash, key, len)) != NULL) { + void *oldval; + /* Key already exists. */ + oldval = cur->val; + if (replace) { + /* Replace the pointer if replacement is requested, + * because this might be a different instance of the same + * string (this verges on magic, sorry) */ + cur->key = key; + cur->val = val; + } + return oldval; + } + + cur = &(h->table[hash]); + if (cur->key == NULL) { + /* Empty slot at hashed location; add this entry */ + cur->key = key; + cur->len = len; + cur->val = val; + + /* Added by ARCHAN at 20050515. This allows deletion could work. */ + cur->next = NULL; + + } + else { + /* Key collision; create new entry and link to hashed location */ + new = (hash_entry_t *) ckd_calloc(1, sizeof(hash_entry_t)); + new->key = key; + new->len = len; + new->val = val; + new->next = cur->next; + cur->next = new; + } + ++h->inuse; + + return val; +} + +/* 20050523 Added by ARCHAN to delete a key from a hash table */ +static void * +delete(hash_table_t * h, uint32 hash, const char *key, size_t len) +{ + hash_entry_t *entry, *prev; + void *val; + + prev = NULL; + entry = &(h->table[hash]); + if (entry->key == NULL) + return NULL; + + if (h->nocase) { + while (entry && ((entry->len != len) + || (keycmp_nocase(entry, key) != 0))) { + prev = entry; + entry = entry->next; + } + } + else { + while (entry && ((entry->len != len) + || (keycmp_case(entry, key) != 0))) { + prev = entry; + entry = entry->next; + } + } + + if (entry == NULL) + return NULL; + + /* At this point, entry will be the one required to be deleted, prev + will contain the previous entry + */ + val = entry->val; + + if (prev == NULL) { + /* That is to say the entry in the hash table (not the chain) matched the key. */ + /* We will then copy the things from the next entry to the hash table */ + prev = entry; + if (entry->next) { /* There is a next entry, great, copy it. */ + entry = entry->next; + prev->key = entry->key; + prev->len = entry->len; + prev->val = entry->val; + prev->next = entry->next; + ckd_free(entry); + } + else { /* There is not a next entry, just set the key to null */ + prev->key = NULL; + prev->len = 0; + prev->next = NULL; + } + + } + else { /* This case is simple */ + prev->next = entry->next; + ckd_free(entry); + } + + /* Do wiring and free the entry */ + + --h->inuse; + + return val; +} + +void +hash_table_empty(hash_table_t *h) +{ + hash_entry_t *e, *e2; + int32 i; + + for (i = 0; i < h->size; i++) { + /* Free collision lists. */ + for (e = h->table[i].next; e; e = e2) { + e2 = e->next; + ckd_free((void *) e); + } + memset(&h->table[i], 0, sizeof(h->table[i])); + } + h->inuse = 0; +} + + +void * +hash_table_enter(hash_table_t * h, const char *key, void *val) +{ + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + return (enter(h, hash, key, len, val, 0)); +} + +void * +hash_table_replace(hash_table_t * h, const char *key, void *val) +{ + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + return (enter(h, hash, key, len, val, 1)); +} + +void * +hash_table_delete(hash_table_t * h, const char *key) +{ + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + + return (delete(h, hash, key, len)); +} + +void * +hash_table_enter_bkey(hash_table_t * h, const char *key, size_t len, void *val) +{ + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + return (enter(h, hash, key, len, val, 0)); +} + +void * +hash_table_replace_bkey(hash_table_t * h, const char *key, size_t len, void *val) +{ + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + return (enter(h, hash, key, len, val, 1)); +} + +void * +hash_table_delete_bkey(hash_table_t * h, const char *key, size_t len) +{ + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + return (delete(h, hash, key, len)); +} + +void +hash_table_display(hash_table_t * h, int32 showdisplay) +{ + hash_entry_t *e; + int i, j; + j = 0; + + printf("Hash with chaining representation of the hash table\n"); + + for (i = 0; i < h->size; i++) { + e = &(h->table[i]); + if (e->key != NULL) { + printf("|key:"); + if (showdisplay) + printf("%s", e->key); + else + printf("%p", e->key); + + printf("|len:%zd|val=%ld|->", e->len, (long)e->val); + if (e->next == NULL) { + printf("NULL\n"); + } + j++; + + for (e = e->next; e; e = e->next) { + printf("|key:"); + if (showdisplay) + printf("%s", e->key); + + printf("|len:%zd|val=%ld|->", e->len, (long)e->val); + if (e->next == NULL) { + printf("NULL\n"); + } + j++; + } + } + } + + printf("The total number of keys =%d\n", j); +} + + +glist_t +hash_table_tolist(hash_table_t * h, int32 * count) +{ + glist_t g; + hash_entry_t *e; + int32 i, j; + + g = NULL; + + j = 0; + for (i = 0; i < h->size; i++) { + e = &(h->table[i]); + + if (e->key != NULL) { + g = glist_add_ptr(g, (void *) e); + j++; + + for (e = e->next; e; e = e->next) { + g = glist_add_ptr(g, (void *) e); + j++; + } + } + } + + if (count) + *count = j; + + return g; +} + +hash_iter_t * +hash_table_iter(hash_table_t *h) +{ + hash_iter_t *itor; + + itor = ckd_calloc(1, sizeof(*itor)); + itor->ht = h; + return hash_table_iter_next(itor); +} + +hash_iter_t * +hash_table_iter_next(hash_iter_t *itor) +{ + /* If there is an entry, walk down its list. */ + if (itor->ent) + itor->ent = itor->ent->next; + /* If we got to the end of the chain, or we had no entry, scan + * forward in the table to find the next non-empty bucket. */ + if (itor->ent == NULL) { + while (itor->idx < itor->ht->size + && itor->ht->table[itor->idx].key == NULL) + ++itor->idx; + /* If we did not find one then delete the iterator and + * return NULL. */ + if (itor->idx == itor->ht->size) { + hash_table_iter_free(itor); + return NULL; + } + /* Otherwise use this next entry. */ + itor->ent = itor->ht->table + itor->idx; + /* Increase idx for the next time around. */ + ++itor->idx; + } + return itor; +} + +void +hash_table_iter_free(hash_iter_t *itor) +{ + ckd_free(itor); +} + +void +hash_table_free(hash_table_t * h) +{ + hash_entry_t *e, *e2; + int32 i; + + if (h == NULL) + return; + + /* Free additional entries created for key collision cases */ + for (i = 0; i < h->size; i++) { + for (e = h->table[i].next; e; e = e2) { + e2 = e->next; + ckd_free((void *) e); + } + } + + ckd_free((void *) h->table); + ckd_free((void *) h); +} diff --git a/media/sphinxbase/src/libsphinxbase/util/heap.c b/media/sphinxbase/src/libsphinxbase/util/heap.c new file mode 100644 index 000000000..2209a0393 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/heap.c @@ -0,0 +1,292 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * heap.c -- Generic heap structure for inserting in any and popping in sorted + * order. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: heap.c,v $ + * Revision 1.4 2005/06/22 03:05:49 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-Mar-99 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Fixed bug in heap_destroy() (in while loop exit condition). + * + * 23-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started. + */ + + +#include +#include +#include +#include + +#include "sphinxbase/heap.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + +/** + * One node on the heap + */ +typedef struct heapnode_s { + void *data; /**< Application data at this node */ + int32 val; /**< Associated with above application data; according to which + heap is sorted (in ascending order) */ + int32 nl, nr; /**< #left/right descendants of this node (for balancing heap) */ + struct heapnode_s *l; /**< Root of left descendant heap */ + struct heapnode_s *r; /**< Root of right descendant heap */ +} heapnode_t; + +/** + * Internal heap data structure. + */ +struct heap_s { + heapnode_t *top; +}; + + +#if 0 +static void +heap_dump(heapnode_t * top, int32 level) +{ + int32 i; + + if (!top) + return; + + for (i = 0; i < level; i++) + printf(" "); + /* print top info */ + heap_dump(top->l, level + 1); + heap_dump(top->r, level + 1); +} +#endif + + +heap_t * +heap_new(void) +{ + heap_t *h = ckd_calloc(1, sizeof(*h)); + return h; +} + + +static heapnode_t * +subheap_insert(heapnode_t * root, void *data, int32 val) +{ + heapnode_t *h; + void *tmpdata; + int32 tmpval; + + if (!root) { + h = (heapnode_t *) ckd_calloc(1, sizeof(heapnode_t)); + h->data = data; + h->val = val; + h->l = h->r = NULL; + h->nl = h->nr = 0; + return h; + } + + /* Root already exists; if new value is less, replace root node */ + if (root->val > val) { + tmpdata = root->data; + tmpval = root->val; + root->data = data; + root->val = val; + data = tmpdata; + val = tmpval; + } + + /* Insert new or old (replaced) node in right or left subtree; keep them balanced */ + if (root->nl > root->nr) { + root->r = subheap_insert(root->r, data, val); + root->nr++; + } + else { + root->l = subheap_insert(root->l, data, val); + root->nl++; + } + + return root; +} + + +int +heap_insert(heap_t *heap, void *data, int32 val) +{ + heap->top = subheap_insert(heap->top, data, val); + return 0; +} + + +static heapnode_t * +subheap_pop(heapnode_t * root) +{ + heapnode_t *l, *r; + + /* Propagate best value from below into root, if any */ + l = root->l; + r = root->r; + + if (!l) { + if (!r) { + ckd_free((char *) root); + return NULL; + } + else { + root->data = r->data; + root->val = r->val; + root->r = subheap_pop(r); + root->nr--; + } + } + else { + if ((!r) || (l->val < r->val)) { + root->data = l->data; + root->val = l->val; + root->l = subheap_pop(l); + root->nl--; + } + else { + root->data = r->data; + root->val = r->val; + root->r = subheap_pop(r); + root->nr--; + } + } + + return root; +} + + +int +heap_pop(heap_t *heap, void **data, int32 * val) +{ + if (heap->top == NULL) + return 0; + *data = heap->top->data; + *val = heap->top->val; + heap->top = subheap_pop(heap->top); + return 1; +} + + +int +heap_top(heap_t *heap, void **data, int32 * val) +{ + if (heap->top == NULL) + return 0; + *data = heap->top->data; + *val = heap->top->val; + return 1; +} + +static int +heap_remove_one(heap_t *heap, heapnode_t *top, void *data) +{ + if (top == NULL) + return -1; + else if (top->data == data) { + assert(top == heap->top); + heap->top = subheap_pop(heap->top); + return 0; + } + if (top->l) { + if (top->l->data == data) { + top->l = subheap_pop(top->l); + --top->nl; + return 0; + } + else if (heap_remove_one(heap, top->l, data) == 0) { + --top->nl; + return 0; + } + } + if (top->r) { + if (top->r->data == data) { + top->r = subheap_pop(top->r); + --top->nr; + return 0; + } + else if (heap_remove_one(heap, top->r, data) == 0) { + --top->nr; + return 0; + } + } + return -1; +} + +int +heap_remove(heap_t *heap, void *data) +{ + return heap_remove_one(heap, heap->top, data); +} + + +size_t +heap_size(heap_t *heap) +{ + if (heap->top == NULL) + return 0; + return heap->top->nl + heap->top->nr + 1; +} + +int +heap_destroy(heap_t *heap) +{ + void *data; + int32 val; + + /* Empty the heap and free it */ + while (heap_pop(heap, &data, &val) > 0) + ; + ckd_free(heap); + + return 0; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/huff_code.c b/media/sphinxbase/src/libsphinxbase/util/huff_code.c new file mode 100644 index 000000000..dd3fb582d --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/huff_code.c @@ -0,0 +1,651 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2009 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include + +#include "sphinxbase/huff_code.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/heap.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/err.h" + +typedef struct huff_node_s { + int nbits; + struct huff_node_s *l; + union { + int32 ival; + char *sval; + struct huff_node_s *r; + } r; +} huff_node_t; + +typedef struct huff_codeword_s { + union { + int32 ival; + char *sval; + } r; + uint32 nbits, codeword; +} huff_codeword_t; + +enum { + HUFF_CODE_INT, + HUFF_CODE_STR +}; + +struct huff_code_s { + int16 refcount; + uint8 maxbits; + uint8 type; + uint32 *firstcode; + uint32 *numl; + huff_codeword_t **syms; + hash_table_t *codewords; + FILE *fh; + bit_encode_t *be; + int boff; +}; + +static huff_node_t * +huff_node_new_int(int32 val) +{ + huff_node_t *hn = ckd_calloc(1, sizeof(*hn)); + hn->r.ival = val; + return hn; +} + +static huff_node_t * +huff_node_new_str(char const *val) +{ + huff_node_t *hn = ckd_calloc(1, sizeof(*hn)); + hn->r.sval = ckd_salloc(val); + return hn; +} + +static huff_node_t * +huff_node_new_parent(huff_node_t *l, huff_node_t *r) +{ + huff_node_t *hn = ckd_calloc(1, sizeof(*hn)); + hn->l = l; + hn->r.r = r; + /* Propagate maximum bit length. */ + if (r->nbits > l->nbits) + hn->nbits = r->nbits + 1; + else + hn->nbits = l->nbits + 1; + return hn; +} + +static void +huff_node_free_int(huff_node_t *root) +{ + if (root->l) { + huff_node_free_int(root->l); + huff_node_free_int(root->r.r); + } + ckd_free(root); +} + +static void +huff_node_free_str(huff_node_t *root, int freestr) +{ + if (root->l) { + huff_node_free_str(root->l, freestr); + huff_node_free_str(root->r.r, freestr); + } + else { + if (freestr) + ckd_free(root->r.sval); + } + ckd_free(root); +} + +static huff_node_t * +huff_code_build_tree(heap_t *q) +{ + huff_node_t *root = NULL; + int32 rf; + + while (heap_size(q) > 1) { + huff_node_t *l, *r, *p; + int32 lf, rf; + + heap_pop(q, (void *)&l, &lf); + heap_pop(q, (void *)&r, &rf); + p = huff_node_new_parent(l, r); + heap_insert(q, p, lf + rf); + } + heap_pop(q, (void **)&root, &rf); + return root; +} + +static void +huff_code_canonicalize(huff_code_t *hc, huff_node_t *root) +{ + glist_t agenda; + uint32 *nextcode; + int i, ncw; + + hc->firstcode = ckd_calloc(hc->maxbits+1, sizeof(*hc->firstcode)); + hc->syms = ckd_calloc(hc->maxbits+1, sizeof(*hc->syms)); + hc->numl = ckd_calloc(hc->maxbits+1, sizeof(*nextcode)); + nextcode = ckd_calloc(hc->maxbits+1, sizeof(*nextcode)); + + /* Traverse the tree, annotating it with the actual bit + * lengths, and histogramming them in numl. */ + root->nbits = 0; + ncw = 0; + agenda = glist_add_ptr(NULL, root); + while (agenda) { + huff_node_t *node = gnode_ptr(agenda); + agenda = gnode_free(agenda, NULL); + if (node->l) { + node->l->nbits = node->nbits + 1; + agenda = glist_add_ptr(agenda, node->l); + node->r.r->nbits = node->nbits + 1; + agenda = glist_add_ptr(agenda, node->r.r); + } + else { + hc->numl[node->nbits]++; + ncw++; + } + } + /* Create starting codes and symbol tables for each bit length. */ + hc->syms[hc->maxbits] = ckd_calloc(hc->numl[hc->maxbits], sizeof(**hc->syms)); + for (i = hc->maxbits - 1; i > 0; --i) { + hc->firstcode[i] = (hc->firstcode[i+1] + hc->numl[i+1]) / 2; + hc->syms[i] = ckd_calloc(hc->numl[i], sizeof(**hc->syms)); + } + memcpy(nextcode, hc->firstcode, (hc->maxbits + 1) * sizeof(*nextcode)); + /* Traverse the tree again to produce the codebook itself. */ + hc->codewords = hash_table_new(ncw, HASH_CASE_YES); + agenda = glist_add_ptr(NULL, root); + while (agenda) { + huff_node_t *node = gnode_ptr(agenda); + agenda = gnode_free(agenda, NULL); + if (node->l) { + agenda = glist_add_ptr(agenda, node->l); + agenda = glist_add_ptr(agenda, node->r.r); + } + else { + /* Initialize codebook entry, which also retains symbol pointer. */ + huff_codeword_t *cw; + uint32 codeword = nextcode[node->nbits] & ((1 << node->nbits) - 1); + cw = hc->syms[node->nbits] + (codeword - hc->firstcode[node->nbits]); + cw->nbits = node->nbits; + cw->r.sval = node->r.sval; /* Will copy ints too... */ + cw->codeword = codeword; + if (hc->type == HUFF_CODE_INT) { + hash_table_enter_bkey(hc->codewords, + (char const *)&cw->r.ival, + sizeof(cw->r.ival), + (void *)cw); + } + else { + hash_table_enter(hc->codewords, cw->r.sval, (void *)cw); + } + ++nextcode[node->nbits]; + } + } + ckd_free(nextcode); +} + +huff_code_t * +huff_code_build_int(int32 const *values, int32 const *frequencies, int nvals) +{ + huff_code_t *hc; + huff_node_t *root; + heap_t *q; + int i; + + hc = ckd_calloc(1, sizeof(*hc)); + hc->refcount = 1; + hc->type = HUFF_CODE_INT; + + /* Initialize the heap with nodes for each symbol. */ + q = heap_new(); + for (i = 0; i < nvals; ++i) { + heap_insert(q, + huff_node_new_int(values[i]), + frequencies[i]); + } + + /* Now build the tree, which gives us codeword lengths. */ + root = huff_code_build_tree(q); + heap_destroy(q); + if (root == NULL || root->nbits > 32) { + E_ERROR("Huffman trees currently limited to 32 bits\n"); + huff_node_free_int(root); + huff_code_free(hc); + return NULL; + } + + /* Build a canonical codebook. */ + hc->maxbits = root->nbits; + huff_code_canonicalize(hc, root); + + /* Tree no longer needed. */ + huff_node_free_int(root); + + return hc; +} + +huff_code_t * +huff_code_build_str(char * const *values, int32 const *frequencies, int nvals) +{ + huff_code_t *hc; + huff_node_t *root; + heap_t *q; + int i; + + hc = ckd_calloc(1, sizeof(*hc)); + hc->refcount = 1; + hc->type = HUFF_CODE_STR; + + /* Initialize the heap with nodes for each symbol. */ + q = heap_new(); + for (i = 0; i < nvals; ++i) { + heap_insert(q, + huff_node_new_str(values[i]), + frequencies[i]); + } + + /* Now build the tree, which gives us codeword lengths. */ + root = huff_code_build_tree(q); + heap_destroy(q); + if (root == NULL || root->nbits > 32) { + E_ERROR("Huffman trees currently limited to 32 bits\n"); + huff_node_free_str(root, TRUE); + huff_code_free(hc); + return NULL; + } + + /* Build a canonical codebook. */ + hc->maxbits = root->nbits; + huff_code_canonicalize(hc, root); + + /* Tree no longer needed (note we retain pointers to its strings). */ + huff_node_free_str(root, FALSE); + + return hc; +} + +huff_code_t * +huff_code_read(FILE *infh) +{ + huff_code_t *hc; + int i, j; + + hc = ckd_calloc(1, sizeof(*hc)); + hc->refcount = 1; + + hc->maxbits = fgetc(infh); + hc->type = fgetc(infh); + + /* Two bytes of padding. */ + fgetc(infh); + fgetc(infh); + + /* Allocate stuff. */ + hc->firstcode = ckd_calloc(hc->maxbits + 1, sizeof(*hc->firstcode)); + hc->numl = ckd_calloc(hc->maxbits + 1, sizeof(*hc->numl)); + hc->syms = ckd_calloc(hc->maxbits + 1, sizeof(*hc->syms)); + + /* Read the symbol tables. */ + hc->codewords = hash_table_new(hc->maxbits, HASH_CASE_YES); + for (i = 1; i <= hc->maxbits; ++i) { + if (fread(&hc->firstcode[i], 4, 1, infh) != 1) + goto error_out; + SWAP_BE_32(&hc->firstcode[i]); + if (fread(&hc->numl[i], 4, 1, infh) != 1) + goto error_out; + SWAP_BE_32(&hc->numl[i]); + hc->syms[i] = ckd_calloc(hc->numl[i], sizeof(**hc->syms)); + for (j = 0; j < hc->numl[i]; ++j) { + huff_codeword_t *cw = &hc->syms[i][j]; + cw->nbits = i; + cw->codeword = hc->firstcode[i] + j; + if (hc->type == HUFF_CODE_INT) { + if (fread(&cw->r.ival, 4, 1, infh) != 1) + goto error_out; + SWAP_BE_32(&cw->r.ival); + hash_table_enter_bkey(hc->codewords, + (char const *)&cw->r.ival, + sizeof(cw->r.ival), + (void *)cw); + } + else { + size_t len; + cw->r.sval = fread_line(infh, &len); + cw->r.sval[len-1] = '\0'; + hash_table_enter(hc->codewords, cw->r.sval, (void *)cw); + } + } + } + + return hc; +error_out: + huff_code_free(hc); + return NULL; +} + +int +huff_code_write(huff_code_t *hc, FILE *outfh) +{ + int i, j; + + /* Maximum codeword length */ + fputc(hc->maxbits, outfh); + /* Symbol type */ + fputc(hc->type, outfh); + /* Two extra bytes (for future use and alignment) */ + fputc(0, outfh); + fputc(0, outfh); + /* For each codeword length: */ + for (i = 1; i <= hc->maxbits; ++i) { + uint32 val; + + /* Starting code, number of codes. */ + val = hc->firstcode[i]; + /* Canonically big-endian (like the data itself) */ + SWAP_BE_32(&val); + fwrite(&val, 4, 1, outfh); + val = hc->numl[i]; + SWAP_BE_32(&val); + fwrite(&val, 4, 1, outfh); + + /* Symbols for each code (FIXME: Should compress these too) */ + for (j = 0; j < hc->numl[i]; ++j) { + if (hc->type == HUFF_CODE_INT) { + int32 val = hc->syms[i][j].r.ival; + SWAP_BE_32(&val); + fwrite(&val, 4, 1, outfh); + } + else { + /* Write them all separated by newlines, so that + * fgets() will read them for us. */ + fprintf(outfh, "%s\n", hc->syms[i][j].r.sval); + } + } + } + return 0; +} + +int +huff_code_dump_codebits(FILE *dumpfh, uint32 nbits, uint32 codeword) +{ + uint32 i; + + for (i = 0; i < nbits; ++i) + fputc((codeword & (1<<(nbits-i-1))) ? '1' : '0', dumpfh); + return 0; +} + +int +huff_code_dump(huff_code_t *hc, FILE *dumpfh) +{ + int i, j; + + /* Print out all codewords. */ + fprintf(dumpfh, "Maximum codeword length: %d\n", hc->maxbits); + fprintf(dumpfh, "Symbols are %s\n", (hc->type == HUFF_CODE_STR) ? "strings" : "ints"); + fprintf(dumpfh, "Codewords:\n"); + for (i = 1; i <= hc->maxbits; ++i) { + for (j = 0; j < hc->numl[i]; ++j) { + if (hc->type == HUFF_CODE_STR) + fprintf(dumpfh, "%-30s", hc->syms[i][j].r.sval); + else + fprintf(dumpfh, "%-30d", hc->syms[i][j].r.ival); + huff_code_dump_codebits(dumpfh, hc->syms[i][j].nbits, + hc->syms[i][j].codeword); + fprintf(dumpfh, "\n"); + } + } + return 0; +} + +huff_code_t * +huff_code_retain(huff_code_t *hc) +{ + ++hc->refcount; + return hc; +} + +int +huff_code_free(huff_code_t *hc) +{ + int i; + + if (hc == NULL) + return 0; + if (--hc->refcount > 0) + return hc->refcount; + for (i = 0; i <= hc->maxbits; ++i) { + int j; + for (j = 0; j < hc->numl[i]; ++j) { + if (hc->type == HUFF_CODE_STR) + ckd_free(hc->syms[i][j].r.sval); + } + ckd_free(hc->syms[i]); + } + ckd_free(hc->firstcode); + ckd_free(hc->numl); + ckd_free(hc->syms); + hash_table_free(hc->codewords); + ckd_free(hc); + return 0; +} + +FILE * +huff_code_attach(huff_code_t *hc, FILE *fh, char const *mode) +{ + FILE *oldfh = huff_code_detach(hc); + + hc->fh = fh; + if (mode[0] == 'w') + hc->be = bit_encode_attach(hc->fh); + return oldfh; +} + +FILE * +huff_code_detach(huff_code_t *hc) +{ + FILE *oldfh = hc->fh; + + if (hc->be) { + bit_encode_flush(hc->be); + bit_encode_free(hc->be); + hc->be = NULL; + } + hc->fh = NULL; + return oldfh; +} + +int +huff_code_encode_int(huff_code_t *hc, int32 sym, uint32 *outcw) +{ + huff_codeword_t *cw; + + if (hash_table_lookup_bkey(hc->codewords, + (char const *)&sym, + sizeof(sym), + (void **)&cw) < 0) + return 0; + if (hc->be) + bit_encode_write_cw(hc->be, cw->codeword, cw->nbits); + if (outcw) *outcw = cw->codeword; + return cw->nbits; +} + +int +huff_code_encode_str(huff_code_t *hc, char const *sym, uint32 *outcw) +{ + huff_codeword_t *cw; + + if (hash_table_lookup(hc->codewords, + sym, + (void **)&cw) < 0) + return 0; + if (hc->be) + bit_encode_write_cw(hc->be, cw->codeword, cw->nbits); + if (outcw) *outcw = cw->codeword; + return cw->nbits; +} + +static huff_codeword_t * +huff_code_decode_data(huff_code_t *hc, char const **inout_data, + size_t *inout_data_len, int *inout_offset) +{ + char const *data = *inout_data; + char const *end = data + *inout_data_len; + int offset = *inout_offset; + uint32 cw; + int cwlen; + int byte; + + if (data == end) + return NULL; + byte = *data++; + cw = !!(byte & (1 << (7-offset++))); + cwlen = 1; + /* printf("%.*x ", cwlen, cw); */ + while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) { + ++cwlen; + cw <<= 1; + if (offset > 7) { + if (data == end) + return NULL; + byte = *data++; + offset = 0; + } + cw |= !!(byte & (1 << (7-offset++))); + /* printf("%.*x ", cwlen, cw); */ + } + if (cwlen > hc->maxbits) /* FAIL: invalid data */ + return NULL; + + /* Put the last byte back if there are bits left over. */ + if (offset < 8) + --data; + else + offset = 0; + + /* printf("%.*x\n", cwlen, cw); */ + *inout_data_len = end - data; + *inout_data = data; + *inout_offset = offset; + return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]); +} + +static huff_codeword_t * +huff_code_decode_fh(huff_code_t *hc) +{ + uint32 cw; + int cwlen; + int byte; + + if ((byte = fgetc(hc->fh)) == EOF) + return NULL; + cw = !!(byte & (1 << (7-hc->boff++))); + cwlen = 1; + /* printf("%.*x ", cwlen, cw); */ + while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) { + ++cwlen; + cw <<= 1; + if (hc->boff > 7) { + if ((byte = fgetc(hc->fh)) == EOF) + return NULL; + hc->boff = 0; + } + cw |= !!(byte & (1 << (7-hc->boff++))); + /* printf("%.*x ", cwlen, cw); */ + } + if (cwlen > hc->maxbits) /* FAIL: invalid data */ + return NULL; + + /* Put the last byte back if there are bits left over. */ + if (hc->boff < 8) + ungetc(byte, hc->fh); + else + hc->boff = 0; + + /* printf("%.*x\n", cwlen, cw); */ + return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]); +} + +int +huff_code_decode_int(huff_code_t *hc, int *outval, + char const **inout_data, + size_t *inout_data_len, int *inout_offset) +{ + huff_codeword_t *cw; + + if (inout_data) + cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset); + else if (hc->fh) + cw = huff_code_decode_fh(hc); + else + return -1; + + if (cw == NULL) + return -1; + if (outval) + *outval = cw->r.ival; + + return 0; +} + +char const * +huff_code_decode_str(huff_code_t *hc, + char const **inout_data, + size_t *inout_data_len, int *inout_offset) +{ + huff_codeword_t *cw; + + if (inout_data) + cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset); + else if (hc->fh) + cw = huff_code_decode_fh(hc); + else + return NULL; + + if (cw == NULL) + return NULL; + + return cw->r.sval; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/listelem_alloc.c b/media/sphinxbase/src/libsphinxbase/util/listelem_alloc.c new file mode 100644 index 000000000..354c4767c --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/listelem_alloc.c @@ -0,0 +1,294 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/listelem_alloc.h" +#include "sphinxbase/glist.h" + +/** + * Fast linked list allocator. + * + * We keep a separate linked list for each element-size. Element-size + * must be a multiple of pointer-size. + * + * Initially a block of empty elements is allocated, where the first + * machine word in each element points to the next available element. + * To allocate, we use this pointer to move the freelist to the next + * element, then return the current element. + * + * The last element in the list starts with a NULL pointer, which is + * used as a signal to allocate a new block of elements. + * + * In order to be able to actually release the memory allocated, we + * have to add a linked list of block pointers. This shouldn't create + * much overhead since we never access it except when freeing the + * allocator. + */ +struct listelem_alloc_s { + char **freelist; /**< ptr to first element in freelist */ + glist_t blocks; /**< Linked list of blocks allocated. */ + glist_t blocksize; /**< Number of elements in each block */ + size_t elemsize; /**< Number of (char *) in element */ + size_t blk_alloc; /**< Number of alloc operations before increasing blocksize */ + size_t n_blocks; + size_t n_alloc; + size_t n_freed; +}; + +#define MIN_ALLOC 50 /**< Minimum number of elements to allocate in one block */ +#define BLKID_SHIFT 16 /**< Bit position of block number in element ID */ +#define BLKID_MASK ((1<freelist = NULL; + list->blocks = NULL; + list->elemsize = elemsize; + /* Intent of this is to increase block size once we allocate + * 256KiB (i.e. 1<<18). If somehow the element size is big enough + * to overflow that, just fail, people should use malloc anyway. */ + list->blk_alloc = (1 << 18) / (MIN_ALLOC * elemsize); + if (list->blk_alloc <= 0) { + E_ERROR("Element size * block size exceeds 256k, use malloc instead.\n"); + ckd_free(list); + return NULL; + } + list->n_alloc = 0; + list->n_freed = 0; + + /* Allocate an initial block to minimize latency. */ + listelem_add_block(list, __FILE__, __LINE__); + return list; +} + +void +listelem_alloc_free(listelem_alloc_t *list) +{ + gnode_t *gn; + if (list == NULL) + return; + for (gn = list->blocks; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(list->blocks); + glist_free(list->blocksize); + ckd_free(list); +} + +static void +listelem_add_block(listelem_alloc_t *list, char *caller_file, int caller_line) +{ + char **cpp, *cp; + size_t j; + int32 blocksize; + + blocksize = list->blocksize ? gnode_int32(list->blocksize) : MIN_ALLOC; + /* Check if block size should be increased (if many requests for this size) */ + if (list->blk_alloc == 0) { + /* See above. No sense in allocating blocks bigger than + * 256KiB (well, actually, there might be, but we'll worry + * about that later). */ + blocksize <<= 1; + if (blocksize * list->elemsize > (1 << 18)) + blocksize = (1 << 18) / list->elemsize; + list->blk_alloc = (1 << 18) / (blocksize * list->elemsize); + } + + /* Allocate block */ + cpp = list->freelist = + (char **) __ckd_calloc__(blocksize, list->elemsize, + caller_file, caller_line); + list->blocks = glist_add_ptr(list->blocks, cpp); + list->blocksize = glist_add_int32(list->blocksize, blocksize); + cp = (char *) cpp; + /* Link up the blocks via their first machine word. */ + for (j = blocksize - 1; j > 0; --j) { + cp += list->elemsize; + *cpp = cp; + cpp = (char **) cp; + } + /* Make sure the last element's forward pointer is NULL */ + *cpp = NULL; + --list->blk_alloc; + ++list->n_blocks; +} + + +void * +__listelem_malloc__(listelem_alloc_t *list, char *caller_file, int caller_line) +{ + char **ptr; + + /* Allocate a new block if list empty */ + if (list->freelist == NULL) + listelem_add_block(list, caller_file, caller_line); + + /* Unlink and return first element in freelist */ + ptr = list->freelist; + list->freelist = (char **) (*(list->freelist)); + (list->n_alloc)++; + + return (void *)ptr; +} + +void * +__listelem_malloc_id__(listelem_alloc_t *list, char *caller_file, + int caller_line, int32 *out_id) +{ + char **ptr; + + /* Allocate a new block if list empty */ + if (list->freelist == NULL) + listelem_add_block(list, caller_file, caller_line); + + /* Unlink and return first element in freelist */ + ptr = list->freelist; + list->freelist = (char **) (*(list->freelist)); + (list->n_alloc)++; + + if (out_id) { + int32 blksize, blkidx, ptridx; + gnode_t *gn, *gn2; + char **block; + + gn2 = list->blocksize; + block = NULL; + blkidx = 0; + for (gn = list->blocks; gn; gn = gnode_next(gn)) { + block = gnode_ptr(gn); + blksize = gnode_int32(gn2) * list->elemsize / sizeof(*block); + if (ptr >= block && ptr < block + blksize) + break; + gn2 = gnode_next(gn2); + ++blkidx; + } + if (gn == NULL) { + E_ERROR("Failed to find block index for pointer %p!\n", ptr); + } + ptridx = (ptr - block) / (list->elemsize / sizeof(*block)); + E_DEBUG(4,("ptr %p block %p blkidx %d ptridx %d\n", + ptr, block, list->n_blocks - blkidx - 1, ptridx)); + *out_id = ((list->n_blocks - blkidx - 1) << BLKID_SHIFT) | ptridx; + } + + return ptr; +} + +void * +listelem_get_item(listelem_alloc_t *list, int32 id) +{ + int32 blkidx, ptridx, i; + gnode_t *gn; + + blkidx = (id >> BLKID_SHIFT) & BLKID_MASK; + ptridx = id & BLKID_MASK; + + i = 0; + blkidx = list->n_blocks - blkidx; + for (gn = list->blocks; gn; gn = gnode_next(gn)) { + if (++i == blkidx) + break; + } + if (gn == NULL) { + E_ERROR("Failed to find block index %d\n", blkidx); + return NULL; + } + + return (void *)((char **)gnode_ptr(gn) + + ptridx * (list->elemsize / sizeof(void *))); +} + +void +__listelem_free__(listelem_alloc_t *list, void *elem, + char *caller_file, int caller_line) +{ + char **cpp; + + /* + * Insert freed item at head of list. + */ + cpp = (char **) elem; + *cpp = (char *) list->freelist; + list->freelist = cpp; + (list->n_freed)++; +} + + +void +listelem_stats(listelem_alloc_t *list) +{ + gnode_t *gn, *gn2; + char **cpp; + size_t n; + + E_INFO("Linklist stats:\n"); + for (n = 0, cpp = list->freelist; cpp; + cpp = (char **) (*cpp), n++); + E_INFO + ("elemsize %lu, #alloc %lu, #freed %lu, #freelist %lu\n", + (unsigned long)list->elemsize, + (unsigned long)list->n_alloc, + (unsigned long)list->n_freed, + (unsigned long)n); + E_INFO("Allocated blocks:\n"); + gn2 = list->blocksize; + for (gn = list->blocks; gn; gn = gnode_next(gn)) { + E_INFO("%p (%d * %d bytes)\n", gnode_ptr(gn), gnode_int32(gn2), list->elemsize); + gn2 = gnode_next(gn2); + } +} diff --git a/media/sphinxbase/src/libsphinxbase/util/logmath.c b/media/sphinxbase/src/libsphinxbase/util/logmath.c new file mode 100644 index 000000000..8702e0ed6 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/logmath.c @@ -0,0 +1,483 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include + +#include "sphinxbase/logmath.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/mmio.h" +#include "sphinxbase/bio.h" +#include "sphinxbase/strfuncs.h" + +struct logmath_s { + logadd_t t; + int refcount; + mmio_file_t *filemap; + float64 base; + float64 log_of_base; + float64 log10_of_base; + float64 inv_log_of_base; + float64 inv_log10_of_base; + int32 zero; +}; + +logmath_t * +logmath_init(float64 base, int shift, int use_table) +{ + logmath_t *lmath; + uint32 maxyx, i; + float64 byx; + int width; + + /* Check that the base is correct. */ + if (base <= 1.0) { + E_ERROR("Base must be greater than 1.0\n"); + return NULL; + } + + /* Set up various necessary constants. */ + lmath = ckd_calloc(1, sizeof(*lmath)); + lmath->refcount = 1; + lmath->base = base; + lmath->log_of_base = log(base); + lmath->log10_of_base = log10(base); + lmath->inv_log_of_base = 1.0/lmath->log_of_base; + lmath->inv_log10_of_base = 1.0/lmath->log10_of_base; + lmath->t.shift = shift; + /* Shift this sufficiently that overflows can be avoided. */ + lmath->zero = MAX_NEG_INT32 >> (shift + 2); + + if (!use_table) + return lmath; + + /* Create a logadd table with the appropriate width */ + maxyx = (uint32) (log(2.0) / log(base) + 0.5) >> shift; + /* Poor man's log2 */ + if (maxyx < 256) width = 1; + else if (maxyx < 65536) width = 2; + else width = 4; + + lmath->t.width = width; + /* Figure out size of add table required. */ + byx = 1.0; /* Maximum possible base^{y-x} value - note that this implies that y-x == 0 */ + for (i = 0;; ++i) { + float64 lobyx = log(1.0 + byx) * lmath->inv_log_of_base; /* log_{base}(1 + base^{y-x}); */ + int32 k = (int32) (lobyx + 0.5 * (1<> shift; /* Round to shift */ + + /* base^{y-x} has reached the smallest representable value. */ + if (k <= 0) + break; + + /* This table is indexed by -(y-x), so we multiply byx by + * base^{-1} here which is equivalent to subtracting one from + * (y-x). */ + byx /= base; + } + i >>= shift; + + /* Never produce a table smaller than 256 entries. */ + if (i < 255) i = 255; + + lmath->t.table = ckd_calloc(i+1, width); + lmath->t.table_size = i + 1; + /* Create the add table (see above). */ + byx = 1.0; + for (i = 0;; ++i) { + float64 lobyx = log(1.0 + byx) * lmath->inv_log_of_base; + int32 k = (int32) (lobyx + 0.5 * (1<> shift; /* Round to shift */ + uint32 prev = 0; + + /* Check any previous value - if there is a shift, we want to + * only store the highest one. */ + switch (width) { + case 1: + prev = ((uint8 *)lmath->t.table)[i >> shift]; + break; + case 2: + prev = ((uint16 *)lmath->t.table)[i >> shift]; + break; + case 4: + prev = ((uint32 *)lmath->t.table)[i >> shift]; + break; + } + if (prev == 0) { + switch (width) { + case 1: + ((uint8 *)lmath->t.table)[i >> shift] = (uint8) k; + break; + case 2: + ((uint16 *)lmath->t.table)[i >> shift] = (uint16) k; + break; + case 4: + ((uint32 *)lmath->t.table)[i >> shift] = (uint32) k; + break; + } + } + if (k <= 0) + break; + + /* Decay base^{y-x} exponentially according to base. */ + byx /= base; + } + + return lmath; +} + +logmath_t * +logmath_read(const char *file_name) +{ + logmath_t *lmath; + char **argname, **argval; + int32 byteswap, i; + int chksum_present, do_mmap; + uint32 chksum; + long pos; + FILE *fp; + + E_INFO("Reading log table file '%s'\n", file_name); + if ((fp = fopen(file_name, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open log table file '%s' for reading", file_name); + return NULL; + } + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) { + E_ERROR("Failed to read the header from the file '%s'\n", file_name); + fclose(fp); + return NULL; + } + + lmath = ckd_calloc(1, sizeof(*lmath)); + /* Default values. */ + lmath->t.shift = 0; + lmath->t.width = 2; + lmath->base = 1.0001; + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + } + else if (strcmp(argname[i], "chksum0") == 0) { + if (strcmp(argval[i], "yes") == 0) + chksum_present = 1; + } + else if (strcmp(argname[i], "width") == 0) { + lmath->t.width = atoi(argval[i]); + } + else if (strcmp(argname[i], "shift") == 0) { + lmath->t.shift = atoi(argval[i]); + } + else if (strcmp(argname[i], "logbase") == 0) { + lmath->base = atof_c(argval[i]); + } + } + bio_hdrarg_free(argname, argval); + chksum = 0; + + /* Set up various necessary constants. */ + lmath->log_of_base = log(lmath->base); + lmath->log10_of_base = log10(lmath->base); + lmath->inv_log_of_base = 1.0/lmath->log_of_base; + lmath->inv_log10_of_base = 1.0/lmath->log10_of_base; + /* Shift this sufficiently that overflows can be avoided. */ + lmath->zero = MAX_NEG_INT32 >> (lmath->t.shift + 2); + + /* #Values to follow */ + if (bio_fread(&lmath->t.table_size, sizeof(int32), 1, fp, byteswap, &chksum) != 1) { + E_ERROR("Failed to read values from the file '%s'", file_name); + goto error_out; + } + + /* Check alignment constraints for memory mapping */ + do_mmap = 1; + pos = ftell(fp); + if (pos & ((long)lmath->t.width - 1)) { + E_WARN("%s: Data start %ld is not aligned on %d-byte boundary, will not memory map\n", + file_name, pos, lmath->t.width); + do_mmap = 0; + } + /* Check byte order for memory mapping */ + if (byteswap) { + E_WARN("%s: Data is wrong-endian, will not memory map\n", file_name); + do_mmap = 0; + } + + if (do_mmap) { + lmath->filemap = mmio_file_read(file_name); + lmath->t.table = (char *)mmio_file_ptr(lmath->filemap) + pos; + } + else { + lmath->t.table = ckd_calloc(lmath->t.table_size, lmath->t.width); + if (bio_fread(lmath->t.table, lmath->t.width, lmath->t.table_size, + fp, byteswap, &chksum) != lmath->t.table_size) { + E_ERROR("Failed to read data (%d x %d bytes) from the file '%s' failed", + lmath->t.table_size, lmath->t.width, file_name); + goto error_out; + } + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&i, 1, 1, fp) == 1) { + E_ERROR("%s: More data than expected\n", file_name); + goto error_out; + } + } + fclose(fp); + + return lmath; +error_out: + logmath_free(lmath); + return NULL; +} + +int32 +logmath_write(logmath_t *lmath, const char *file_name) +{ + FILE *fp; + long pos; + uint32 chksum; + + if (lmath->t.table == NULL) { + E_ERROR("No log table to write!\n"); + return -1; + } + + E_INFO("Writing log table file '%s'\n", file_name); + if ((fp = fopen(file_name, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open logtable file '%s' for writing", file_name); + return -1; + } + + /* For whatever reason, we have to do this manually at the + * moment. */ + fprintf(fp, "s3\nversion 1.0\nchksum0 yes\n"); + fprintf(fp, "width %d\n", lmath->t.width); + fprintf(fp, "shift %d\n", lmath->t.shift); + fprintf(fp, "logbase %f\n", lmath->base); + /* Pad it out to ensure alignment. */ + pos = ftell(fp) + strlen("endhdr\n"); + if (pos & ((long)lmath->t.width - 1)) { + size_t align = lmath->t.width - (pos & ((long)lmath->t.width - 1)); + assert(lmath->t.width <= 8); + fwrite(" " /* 8 spaces */, 1, align, fp); + } + fprintf(fp, "endhdr\n"); + + /* Now write the binary data. */ + chksum = (uint32)BYTE_ORDER_MAGIC; + fwrite(&chksum, sizeof(uint32), 1, fp); + chksum = 0; + /* #Values to follow */ + if (bio_fwrite(&lmath->t.table_size, sizeof(uint32), + 1, fp, 0, &chksum) != 1) { + E_ERROR("Failed to write data to a file '%s'", file_name); + goto error_out; + } + + if (bio_fwrite(lmath->t.table, lmath->t.width, lmath->t.table_size, + fp, 0, &chksum) != lmath->t.table_size) { + E_ERROR("Failed to write data (%d x %d bytes) to the file '%s'", + lmath->t.table_size, lmath->t.width, file_name); + goto error_out; + } + if (bio_fwrite(&chksum, sizeof(uint32), 1, fp, 0, NULL) != 1) { + E_ERROR("Failed to write checksum to the file '%s'", file_name); + goto error_out; + } + + fclose(fp); + return 0; + +error_out: + fclose(fp); + return -1; +} + +logmath_t * +logmath_retain(logmath_t *lmath) +{ + ++lmath->refcount; + return lmath; +} + +int +logmath_free(logmath_t *lmath) +{ + if (lmath == NULL) + return 0; + if (--lmath->refcount > 0) + return lmath->refcount; + if (lmath->filemap) + mmio_file_unmap(lmath->filemap); + else + ckd_free(lmath->t.table); + ckd_free(lmath); + return 0; +} + +int32 +logmath_get_table_shape(logmath_t *lmath, uint32 *out_size, + uint32 *out_width, uint32 *out_shift) +{ + if (out_size) *out_size = lmath->t.table_size; + if (out_width) *out_width = lmath->t.width; + if (out_shift) *out_shift = lmath->t.shift; + + return lmath->t.table_size * lmath->t.width; +} + +float64 +logmath_get_base(logmath_t *lmath) +{ + return lmath->base; +} + +int +logmath_get_zero(logmath_t *lmath) +{ + return lmath->zero; +} + +int +logmath_get_width(logmath_t *lmath) +{ + return lmath->t.width; +} + +int +logmath_get_shift(logmath_t *lmath) +{ + return lmath->t.shift; +} + +int +logmath_add(logmath_t *lmath, int logb_x, int logb_y) +{ + logadd_t *t = LOGMATH_TABLE(lmath); + int d, r; + + /* handle 0 + x = x case. */ + if (logb_x <= lmath->zero) + return logb_y; + if (logb_y <= lmath->zero) + return logb_x; + + if (t->table == NULL) + return logmath_add_exact(lmath, logb_x, logb_y); + + /* d must be positive, obviously. */ + if (logb_x > logb_y) { + d = (logb_x - logb_y); + r = logb_x; + } + else { + d = (logb_y - logb_x); + r = logb_y; + } + + if (d < 0) { + /* Some kind of overflow has occurred, fail gracefully. */ + return r; + } + if ((size_t)d >= t->table_size) { + /* If this happens, it's not actually an error, because the + * last entry in the logadd table is guaranteed to be zero. + * Therefore we just return the larger of the two values. */ + return r; + } + + switch (t->width) { + case 1: + return r + (((uint8 *)t->table)[d]); + case 2: + return r + (((uint16 *)t->table)[d]); + case 4: + return r + (((uint32 *)t->table)[d]); + } + return r; +} + +int +logmath_add_exact(logmath_t *lmath, int logb_p, int logb_q) +{ + return logmath_log(lmath, + logmath_exp(lmath, logb_p) + + logmath_exp(lmath, logb_q)); +} + +int +logmath_log(logmath_t *lmath, float64 p) +{ + if (p <= 0) { + return lmath->zero; + } + return (int)(log(p) * lmath->inv_log_of_base) >> lmath->t.shift; +} + +float64 +logmath_exp(logmath_t *lmath, int logb_p) +{ + return pow(lmath->base, (float64)(logb_p << lmath->t.shift)); +} + +int +logmath_ln_to_log(logmath_t *lmath, float64 log_p) +{ + return (int)(log_p * lmath->inv_log_of_base) >> lmath->t.shift; +} + +float64 +logmath_log_to_ln(logmath_t *lmath, int logb_p) +{ + return (float64)(logb_p << lmath->t.shift) * lmath->log_of_base; +} + +int +logmath_log10_to_log(logmath_t *lmath, float64 log_p) +{ + return (int)(log_p * lmath->inv_log10_of_base) >> lmath->t.shift; +} + +float64 +logmath_log_to_log10(logmath_t *lmath, int logb_p) +{ + return (float64)(logb_p << lmath->t.shift) * lmath->log10_of_base; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/matrix.c b/media/sphinxbase/src/libsphinxbase/util/matrix.c new file mode 100644 index 000000000..27ba08f68 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/matrix.c @@ -0,0 +1,313 @@ +/* -*- c-basic-offset: 4 -*- */ +/* ==================================================================== + * Copyright (c) 1997-2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sphinxbase/clapack_lite.h" +#include "sphinxbase/matrix.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + +void +norm_3d(float32 ***arr, + uint32 d1, + uint32 d2, + uint32 d3) +{ + uint32 i, j, k; + float64 s; + + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + + /* compute sum (i, j) as over all k */ + for (k = 0, s = 0; k < d3; k++) { + s += arr[i][j][k]; + } + + /* do 1 floating point divide */ + s = 1.0 / s; + + /* divide all k by sum over k */ + for (k = 0; k < d3; k++) { + arr[i][j][k] *= s; + } + } + } +} + +void +accum_3d(float32 ***out, + float32 ***in, + uint32 d1, + uint32 d2, + uint32 d3) +{ + uint32 i, j, k; + + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + for (k = 0; k < d3; k++) { + out[i][j][k] += in[i][j][k]; + } + } + } +} + +void +floor_nz_3d(float32 ***m, + uint32 d1, + uint32 d2, + uint32 d3, + float32 floor) +{ + uint32 i, j, k; + + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + for (k = 0; k < d3; k++) { + if ((m[i][j][k] != 0) && (m[i][j][k] < floor)) + m[i][j][k] = floor; + } + } + } +} +void +floor_nz_1d(float32 *v, + uint32 d1, + float32 floor) +{ + uint32 i; + + for (i = 0; i < d1; i++) { + if ((v[i] != 0) && (v[i] < floor)) + v[i] = floor; + } +} + +void +band_nz_1d(float32 *v, + uint32 d1, + float32 band) +{ + uint32 i; + + for (i = 0; i < d1; i++) { + if (v[i] != 0) { + if ((v[i] > 0) && (v[i] < band)) { + v[i] = band; + } + else if ((v[i] < 0) && (v[i] > -band)) { + v[i] = -band; + } + } + } +} + +#ifndef WITH_LAPACK +float64 +determinant(float32 **a, int32 n) +{ + E_FATAL("No LAPACK library available, cannot compute determinant (FIXME)\n"); + return 0.0; +} +int32 +invert(float32 **ainv, float32 **a, int32 n) +{ + E_FATAL("No LAPACK library available, cannot compute matrix inverse (FIXME)\n"); + return 0; +} +int32 +solve(float32 **a, float32 *b, float32 *out_x, int32 n) +{ + E_FATAL("No LAPACK library available, cannot solve linear equations (FIXME)\n"); + return 0; +} + +void +matrixmultiply(float32 ** c, float32 ** a, float32 ** b, int32 n) +{ + int32 i, j, k; + + memset(c[0], 0, n*n*sizeof(float32)); + for (i = 0; i < n; ++i) { + for (j = 0; j < n; ++j) { + for (k = 0; k < n; ++k) { + c[i][k] += a[i][j] * b[j][k]; + } + } + } +} +#else /* WITH_LAPACK */ +/* Find determinant through LU decomposition. */ +float64 +determinant(float32 ** a, int32 n) +{ + float32 **tmp_a; + float64 det; + char uplo; + int32 info, i; + + /* a is assumed to be symmetric, so we don't need to switch the + * ordering of the data. But we do need to copy it since it is + * overwritten by LAPACK. */ + tmp_a = (float32 **)ckd_calloc_2d(n, n, sizeof(float32)); + memcpy(tmp_a[0], a[0], n*n*sizeof(float32)); + + uplo = 'L'; + spotrf_(&uplo, &n, tmp_a[0], &n, &info); + det = tmp_a[0][0]; + /* det = prod(diag(l))^2 */ + for (i = 1; i < n; ++i) + det *= tmp_a[i][i]; + ckd_free_2d((void **)tmp_a); + if (info > 0) + return -1.0; /* Generic "not positive-definite" answer */ + else + return det * det; +} + +int32 +solve(float32 **a, /*Input : an n*n matrix A */ + float32 *b, /*Input : a n dimesion vector b */ + float32 *out_x, /*Output : a n dimesion vector x */ + int32 n) +{ + char uplo; + float32 **tmp_a; + int32 info, nrhs; + + /* a is assumed to be symmetric, so we don't need to switch the + * ordering of the data. But we do need to copy it since it is + * overwritten by LAPACK. */ + tmp_a = (float32 **)ckd_calloc_2d(n, n, sizeof(float32)); + memcpy(tmp_a[0], a[0], n*n*sizeof(float32)); + memcpy(out_x, b, n*sizeof(float32)); + uplo = 'L'; + nrhs = 1; + sposv_(&uplo, &n, &nrhs, tmp_a[0], &n, out_x, &n, &info); + ckd_free_2d((void **)tmp_a); + + if (info != 0) + return -1; + else + return info; +} + +/* Find inverse by solving AX=I. */ +int32 +invert(float32 ** ainv, float32 ** a, int32 n) +{ + char uplo; + float32 **tmp_a; + int32 info, nrhs, i; + + /* Construct an identity matrix. */ + memset(ainv[0], 0, sizeof(float32) * n * n); + for (i = 0; i < n; i++) + ainv[i][i] = 1.0; + /* a is assumed to be symmetric, so we don't need to switch the + * ordering of the data. But we do need to copy it since it is + * overwritten by LAPACK. */ + tmp_a = (float32 **)ckd_calloc_2d(n, n, sizeof(float32)); + memcpy(tmp_a[0], a[0], n*n*sizeof(float32)); + uplo = 'L'; + nrhs = n; + sposv_(&uplo, &n, &nrhs, tmp_a[0], &n, ainv[0], &n, &info); + ckd_free_2d((void **)tmp_a); + + if (info != 0) + return -1; + else + return info; +} + +void +matrixmultiply(float32 ** c, float32 ** a, float32 ** b, int32 n) +{ + char side, uplo; + float32 alpha; + + side = 'L'; + uplo = 'L'; + alpha = 1.0; + ssymm_(&side, &uplo, &n, &n, &alpha, a[0], &n, b[0], &n, &alpha, c[0], &n); +} + +#endif /* WITH_LAPACK */ + +void +outerproduct(float32 ** a, float32 * x, float32 * y, int32 len) +{ + int32 i, j; + + for (i = 0; i < len; ++i) { + a[i][i] = x[i] * y[i]; + for (j = i + 1; j < len; ++j) { + a[i][j] = x[i] * y[j]; + a[j][i] = x[j] * y[i]; + } + } +} + +void +scalarmultiply(float32 ** a, float32 x, int32 n) +{ + int32 i, j; + + for (i = 0; i < n; ++i) { + a[i][i] *= x; + for (j = i+1; j < n; ++j) { + a[i][j] *= x; + a[j][i] *= x; + } + } +} + +void +matrixadd(float32 ** a, float32 ** b, int32 n) +{ + int32 i, j; + + for (i = 0; i < n; ++i) + for (j = 0; j < n; ++j) + a[i][j] += b[i][j]; +} diff --git a/media/sphinxbase/src/libsphinxbase/util/mmio.c b/media/sphinxbase/src/libsphinxbase/util/mmio.c new file mode 100644 index 000000000..0b2315b71 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/mmio.c @@ -0,0 +1,257 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: mmio.c + * + * Description: mmap() wrappers for Unix/Windows + * + * Author: David Huggins-Daines + * + *********************************************************************/ + +#include +#include + + +#ifdef GNUWINCE +# include +# include +# include +# include +#elif defined(__SYMBIAN32__) /* SYMBIAN32 must be before WIN32 since Symbian SDK defines WIN32 as well */ +# include +# include +# include +# include +#elif defined(_WIN32) +# include +#else +# include +# include +# include +# include +# include +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/err.h" +#include "sphinxbase/mmio.h" +#include "sphinxbase/ckd_alloc.h" + +#if defined(_WIN32_WCE) || defined(GNUWINCE) +struct mmio_file_s { + int dummy; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + HANDLE ffm, fd; + WCHAR *wfilename; + void *rv; + int len; + + len = mbstowcs(NULL, filename, 0) + 1; + wfilename = malloc(len * sizeof(WCHAR)); + mbstowcs(wfilename, filename, len); + + if ((ffm = + CreateFileForMappingW(wfilename, GENERIC_READ, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, + NULL)) == INVALID_HANDLE_VALUE) { + E_ERROR("Failed to create mapping for the file '%s': %08x\n", filename, + GetLastError()); + return NULL; + } + if ((fd = + CreateFileMappingW(ffm, NULL, PAGE_READONLY, 0, 0, NULL)) == NULL) { + E_ERROR("Failed to CreateFileMapping: %08x\n", GetLastError()); + CloseHandle(ffm); + return NULL; + } + rv = MapViewOfFile(fd, FILE_MAP_READ, 0, 0, 0); + free(wfilename); + CloseHandle(ffm); + CloseHandle(fd); + + return (mmio_file_t *) rv; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + if (!UnmapViewOfFile((void *)mf)) { + E_ERROR("Failed to UnmapViewOfFile: %08x\n", GetLastError()); + } +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + return (void *)mf; +} + +#elif defined(_WIN32) && !defined(_WIN32_WP) /* !WINCE */ +struct mmio_file_s { + int dummy; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + HANDLE ffm, fd; + void *rv; + + if ((ffm = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, + NULL)) == INVALID_HANDLE_VALUE) { + E_ERROR("Failed to create file '%s': %08x\n", + filename, GetLastError()); + return NULL; + } + if ((fd = CreateFileMapping(ffm, NULL, + PAGE_READONLY, 0, 0, NULL)) == NULL) { + E_ERROR("Failed to CreateFileMapping: %08x\n", GetLastError()); + CloseHandle(ffm); + } + rv = MapViewOfFile(fd, FILE_MAP_READ, 0, 0, 0); + CloseHandle(ffm); + CloseHandle(fd); + + return (mmio_file_t *)rv; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + if (!UnmapViewOfFile((void *)mf)) { + E_ERROR("Failed to UnmapViewOfFile: %08x\n", GetLastError()); + } +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + return (void *)mf; +} + +#else /* !WIN32, !WINCE */ +#if defined(__ADSPBLACKFIN__) || defined(_WIN32_WP) + /* This is true for both uClinux and VisualDSP++, + but actually we need a better way to detect it. */ +struct mmio_file_s { + int dummy; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + E_ERROR("mmio is not implemented on this platform!"); + return NULL; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + E_ERROR("mmio is not implemented on this platform!"); +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + E_ERROR("mmio is not implemented on this platform!"); + return NULL; +} +#else /* !__ADSPBLACKFIN__ */ +struct mmio_file_s { + void *ptr; + size_t mapsize; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + mmio_file_t *mf; + struct stat buf; + void *ptr; + int fd; + size_t pagesize; + + if ((fd = open(filename, O_RDONLY)) == -1) { + E_ERROR_SYSTEM("Failed to open %s", filename); + return NULL; + } + if (fstat(fd, &buf) == -1) { + E_ERROR_SYSTEM("Failed to stat %s", filename); + close(fd); + return NULL; + } + ptr = mmap(NULL, buf.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + E_ERROR_SYSTEM("Failed to mmap %lld bytes", (unsigned long long)buf.st_size); + close(fd); + return NULL; + } + close(fd); + mf = ckd_calloc(1, sizeof(*mf)); + mf->ptr = ptr; + /* Align map size to next page. */ + pagesize = sysconf(_SC_PAGESIZE); + mf->mapsize = (buf.st_size + pagesize - 1) / pagesize * pagesize; + + return mf; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + if (mf == NULL) + return; + if (munmap(mf->ptr, mf->mapsize) < 0) { + E_ERROR_SYSTEM("Failed to unmap %ld bytes at %p", mf->mapsize, mf->ptr); + } + ckd_free(mf); +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + return mf->ptr; +} +#endif /* !__ADSPBLACKFIN__ */ +#endif /* !(WINCE || WIN32) */ diff --git a/media/sphinxbase/src/libsphinxbase/util/pio.c b/media/sphinxbase/src/libsphinxbase/util/pio.c new file mode 100644 index 000000000..4c520bebe --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/pio.c @@ -0,0 +1,655 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#if defined(_WIN32) && !defined(CYGWIN) +#include +#endif + +#include "sphinxbase/pio.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/err.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/ckd_alloc.h" + +#ifndef EXEEXT +#define EXEEXT "" +#endif + +enum { + COMP_NONE, + COMP_COMPRESS, + COMP_GZIP, + COMP_BZIP2 +}; + +static void +guess_comptype(char const *file, int32 *ispipe, int32 *isgz) +{ + size_t k; + + k = strlen(file); + *ispipe = 0; + *isgz = COMP_NONE; + if ((k > 2) + && ((strcmp(file + k - 2, ".Z") == 0) + || (strcmp(file + k - 2, ".z") == 0))) { + *ispipe = 1; + *isgz = COMP_COMPRESS; + } + else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0) + || (strcmp(file + k - 3, ".GZ") == 0))) { + *ispipe = 1; + *isgz = COMP_GZIP; + } + else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0) + || (strcmp(file + k - 4, ".BZ2") == 0))) { + *ispipe = 1; + *isgz = COMP_BZIP2; + } +} + +FILE * +fopen_comp(const char *file, const char *mode, int32 * ispipe) +{ + FILE *fp; + +#ifndef HAVE_POPEN + *ispipe = 0; /* No popen() on WinCE */ +#else /* HAVE_POPEN */ + int32 isgz; + guess_comptype(file, ispipe, &isgz); +#endif /* HAVE_POPEN */ + + if (*ispipe) { +#ifndef HAVE_POPEN + /* Shouldn't get here, anyway */ + E_FATAL("No popen() on WinCE\n"); +#else + if (strcmp(mode, "r") == 0) { + char *command; + switch (isgz) { + case COMP_GZIP: + command = string_join("gunzip" EXEEXT, " -c ", file, NULL); + break; + case COMP_COMPRESS: + command = string_join("zcat" EXEEXT, " ", file, NULL); + break; + case COMP_BZIP2: + command = string_join("bunzip2" EXEEXT, " -c ", file, NULL); + break; + default: + command = NULL; /* Make compiler happy. */ + E_FATAL("Unknown compression type %d\n", isgz); + } + if ((fp = popen(command, mode)) == NULL) { + E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); + ckd_free(command); + return NULL; + } + ckd_free(command); + } + else if (strcmp(mode, "w") == 0) { + char *command; + switch (isgz) { + case COMP_GZIP: + command = string_join("gzip" EXEEXT, " > ", file, NULL); + break; + case COMP_COMPRESS: + command = string_join("compress" EXEEXT, " -c > ", file, NULL); + break; + case COMP_BZIP2: + command = string_join("bzip2" EXEEXT, " > ", file, NULL); + break; + default: + command = NULL; /* Make compiler happy. */ + E_FATAL("Unknown compression type %d\n", isgz); + } + if ((fp = popen(command, mode)) == NULL) { + E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); + ckd_free(command); + return NULL; + } + ckd_free(command); + } + else { + E_ERROR("Compressed file operation for mode %s is not supported", mode); + return NULL; + } +#endif /* HAVE_POPEN */ + } + else { + fp = fopen(file, mode); + } + + return (fp); +} + + +void +fclose_comp(FILE * fp, int32 ispipe) +{ + if (ispipe) { +#ifdef HAVE_POPEN +#if defined(_WIN32) && (!defined(__SYMBIAN32__)) + _pclose(fp); +#else + pclose(fp); +#endif +#endif + } + else + fclose(fp); +} + + +FILE * +fopen_compchk(const char *file, int32 * ispipe) +{ +#ifndef HAVE_POPEN + *ispipe = 0; /* No popen() on WinCE */ + /* And therefore the rest of this function is useless. */ + return (fopen_comp(file, "r", ispipe)); +#else /* HAVE_POPEN */ + int32 isgz; + FILE *fh; + + /* First just try to fopen_comp() it */ + if ((fh = fopen_comp(file, "r", ispipe)) != NULL) + return fh; + else { + char *tmpfile; + size_t k; + + /* File doesn't exist; try other compressed/uncompressed form, as appropriate */ + guess_comptype(file, ispipe, &isgz); + k = strlen(file); + tmpfile = ckd_calloc(k+5, 1); + strcpy(tmpfile, file); + switch (isgz) { + case COMP_GZIP: + tmpfile[k - 3] = '\0'; + break; + case COMP_BZIP2: + tmpfile[k - 4] = '\0'; + break; + case COMP_COMPRESS: + tmpfile[k - 2] = '\0'; + break; + case COMP_NONE: + strcpy(tmpfile + k, ".gz"); + if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { + E_WARN("Using %s instead of %s\n", tmpfile, file); + ckd_free(tmpfile); + return fh; + } + strcpy(tmpfile + k, ".bz2"); + if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { + E_WARN("Using %s instead of %s\n", tmpfile, file); + ckd_free(tmpfile); + return fh; + } + strcpy(tmpfile + k, ".Z"); + if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { + E_WARN("Using %s instead of %s\n", tmpfile, file); + ckd_free(tmpfile); + return fh; + } + ckd_free(tmpfile); + return NULL; + } + E_WARN("Using %s instead of %s\n", tmpfile, file); + fh = fopen_comp(tmpfile, "r", ispipe); + ckd_free(tmpfile); + return NULL; + } +#endif /* HAVE_POPEN */ +} + +lineiter_t * +lineiter_start(FILE *fh) +{ + lineiter_t *li; + + li = (lineiter_t *)ckd_calloc(1, sizeof(*li)); + li->buf = (char *)ckd_malloc(128); + li->buf[0] = '\0'; + li->bsiz = 128; + li->len = 0; + li->fh = fh; + + li = lineiter_next(li); + + /* Strip the UTF-8 BOM */ + + if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) { + memmove(li->buf, li->buf + 3, strlen(li->buf + 1)); + li->len -= 3; + } + + return li; +} + +lineiter_t * +lineiter_start_clean(FILE *fh) +{ + lineiter_t *li; + + li = lineiter_start(fh); + + if (li == NULL) + return li; + + li->clean = TRUE; + + if (li->buf && li->buf[0] == '#') { + li = lineiter_next(li); + } else { + string_trim(li->buf, STRING_BOTH); + } + + return li; +} + + +static lineiter_t * +lineiter_next_plain(lineiter_t *li) +{ + /* We are reading the next line */ + li->lineno++; + + /* Read a line and check for EOF. */ + if (fgets(li->buf, li->bsiz, li->fh) == NULL) { + lineiter_free(li); + return NULL; + } + /* If we managed to read the whole thing, then we are done + * (this will be by far the most common result). */ + li->len = (int32)strlen(li->buf); + if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') + return li; + + /* Otherwise we have to reallocate and keep going. */ + while (1) { + li->bsiz *= 2; + li->buf = (char *)ckd_realloc(li->buf, li->bsiz); + /* If we get an EOF, we are obviously done. */ + if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) { + li->len += strlen(li->buf + li->len); + return li; + } + li->len += strlen(li->buf + li->len); + /* If we managed to read the whole thing, then we are done. */ + if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') + return li; + } + + /* Shouldn't get here. */ + return li; +} + + +lineiter_t * +lineiter_next(lineiter_t *li) +{ + if (!li->clean) + return lineiter_next_plain(li); + + for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) { + if (li->buf && li->buf[0] != '#') { + li->buf = string_trim(li->buf, STRING_BOTH); + break; + } + } + return li; +} + +int lineiter_lineno(lineiter_t *li) +{ + return li->lineno; +} + +void +lineiter_free(lineiter_t *li) +{ + if (li == NULL) + return; + ckd_free(li->buf); + ckd_free(li); +} + +char * +fread_line(FILE *stream, size_t *out_len) +{ + char *output, *outptr; + char buf[128]; + + output = outptr = NULL; + while (fgets(buf, sizeof(buf), stream)) { + size_t len = strlen(buf); + /* Append this data to the buffer. */ + if (output == NULL) { + output = (char *)ckd_malloc(len + 1); + outptr = output; + } + else { + size_t cur = outptr - output; + output = (char *)ckd_realloc(output, cur + len + 1); + outptr = output + cur; + } + memcpy(outptr, buf, len + 1); + outptr += len; + /* Stop on a short read or end of line. */ + if (len < sizeof(buf)-1 || buf[len-1] == '\n') + break; + } + if (out_len) *out_len = outptr - output; + return output; +} + +#define FREAD_RETRY_COUNT 60 + +int32 +fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream) +{ + char *data; + size_t n_items_read; + size_t n_items_rem; + uint32 n_retry_rem; + int32 loc; + + n_retry_rem = FREAD_RETRY_COUNT; + + data = (char *)pointer; + loc = 0; + n_items_rem = num_items; + + do { + n_items_read = fread(&data[loc], size, n_items_rem, stream); + + n_items_rem -= n_items_read; + + if (n_items_rem > 0) { + /* an incomplete read occurred */ + + if (n_retry_rem == 0) + return -1; + + if (n_retry_rem == FREAD_RETRY_COUNT) { + E_ERROR_SYSTEM("fread() failed; retrying...\n"); + } + + --n_retry_rem; + + loc += n_items_read * size; +#if !defined(_WIN32) && defined(HAVE_UNISTD_H) + sleep(1); +#endif + } + } while (n_items_rem > 0); + + return num_items; +} + + +#ifdef _WIN32_WCE /* No stat() on WinCE */ +int32 +stat_retry(const char *file, struct stat * statbuf) +{ + WIN32_FIND_DATAW file_data; + HANDLE *h; + wchar_t *wfile; + size_t len; + + len = mbstowcs(NULL, file, 0) + 1; + wfile = ckd_calloc(len, sizeof(*wfile)); + mbstowcs(wfile, file, len); + if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) { + ckd_free(wfile); + return -1; + } + ckd_free(wfile); + memset(statbuf, 0, sizeof(*statbuf)); + statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime; + statbuf->st_size = file_data.nFileSizeLow; + FindClose(h); + + return 0; +} + + +int32 +stat_mtime(const char *file) +{ + struct stat statbuf; + + if (stat_retry(file, &statbuf) != 0) + return -1; + + return ((int32) statbuf.st_mtime); +} +#else +#define STAT_RETRY_COUNT 10 +int32 +stat_retry(const char *file, struct stat * statbuf) +{ + int32 i; + + for (i = 0; i < STAT_RETRY_COUNT; i++) { +#ifndef HAVE_SYS_STAT_H + FILE *fp; + + if ((fp = (FILE *)fopen(file, "r")) != 0) { + fseek(fp, 0, SEEK_END); + statbuf->st_size = ftell(fp); + fclose(fp); + return 0; + } +#else /* HAVE_SYS_STAT_H */ + if (stat(file, statbuf) == 0) + return 0; +#endif + if (i == 0) { + E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file); + } +#ifdef HAVE_UNISTD_H + sleep(1); +#endif + } + + return -1; +} + +int32 +stat_mtime(const char *file) +{ + struct stat statbuf; + +#ifdef HAVE_SYS_STAT_H + if (stat(file, &statbuf) != 0) + return -1; +#else /* HAVE_SYS_STAT_H */ + if (stat_retry(file, &statbuf) != 0) + return -1; +#endif /* HAVE_SYS_STAT_H */ + + return ((int32) statbuf.st_mtime); +} +#endif /* !_WIN32_WCE */ + +struct bit_encode_s { + FILE *fh; + unsigned char buf, bbits; + int16 refcount; +}; + +bit_encode_t * +bit_encode_attach(FILE *outfh) +{ + bit_encode_t *be; + + be = (bit_encode_t *)ckd_calloc(1, sizeof(*be)); + be->refcount = 1; + be->fh = outfh; + return be; +} + +bit_encode_t * +bit_encode_retain(bit_encode_t *be) +{ + ++be->refcount; + return be; +} + +int +bit_encode_free(bit_encode_t *be) +{ + if (be == NULL) + return 0; + if (--be->refcount > 0) + return be->refcount; + ckd_free(be); + + return 0; +} + +int +bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits) +{ + int tbits; + + tbits = nbits + be->bbits; + if (tbits < 8) { + /* Append to buffer. */ + be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits)); + } + else { + int i = 0; + while (tbits >= 8) { + /* Shift bits out of the buffer and splice with high-order bits */ + fputc(be->buf | ((bits[i]) >> be->bbits), be->fh); + /* Put low-order bits back into buffer */ + be->buf = (bits[i] << (8 - be->bbits)) & 0xff; + tbits -= 8; + ++i; + } + } + /* tbits contains remaining number of bits. */ + be->bbits = tbits; + + return nbits; +} + +int +bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits) +{ + unsigned char bits[4]; + codeword <<= (32 - nbits); + bits[0] = (codeword >> 24) & 0xff; + bits[1] = (codeword >> 16) & 0xff; + bits[2] = (codeword >> 8) & 0xff; + bits[3] = codeword & 0xff; + return bit_encode_write(be, bits, nbits); +} + +int +bit_encode_flush(bit_encode_t *be) +{ + if (be->bbits) { + fputc(be->buf, be->fh); + be->bbits = 0; + } + return 0; +} + +int +build_directory(const char *path) +{ + int rv; + + /* Utterly failed... */ + if (strlen(path) == 0) + return -1; + +#if defined(_WIN32) && !defined(CYGWIN) + else if ((rv = _mkdir(path)) == 0) + return 0; +#elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */ + else if ((rv = mkdir(path, 0777)) == 0) + return 0; +#endif + + /* Or, it already exists... */ + else if (errno == EEXIST) + return 0; + else if (errno != ENOENT) { + E_ERROR_SYSTEM("Failed to create %s", path); + return -1; + } + else { + char *dirname = ckd_salloc(path); + path2dirname(path, dirname); + build_directory(dirname); + ckd_free(dirname); + +#if defined(_WIN32) && !defined(CYGWIN) + return _mkdir(path); +#elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */ + return mkdir(path, 0777); +#endif + } +} diff --git a/media/sphinxbase/src/libsphinxbase/util/profile.c b/media/sphinxbase/src/libsphinxbase/util/profile.c new file mode 100644 index 000000000..c8d84ca98 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/profile.c @@ -0,0 +1,345 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * profile.c -- For timing and event counting. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: profile.c,v $ + * Revision 1.7 2005/06/22 03:10:59 arthchan2003 + * 1, Fixed doxygen documentation, 2, Added keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 11-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added ptmr_init(). + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#if defined(_WIN32) && !defined(__SYMBIAN32__) +# include +# ifndef _WIN32_WCE +# include +# endif +#elif defined(HAVE_UNISTD_H) /* I know this, this is Unix... */ +# include +# include +# include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/profile.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + +#if defined(_WIN32_WCE) || defined(_WIN32_WP) +DWORD unlink(const char *filename) +{ + WCHAR *wfilename; + DWORD rv; + size_t len; + + len = mbstowcs(NULL, filename, 0); + wfilename = ckd_calloc(len+1, sizeof(*wfilename)); + mbstowcs(wfilename, filename, len); + rv = DeleteFileW(wfilename); + ckd_free(wfilename); + + return rv; +} +#endif + +pctr_t * +pctr_new(char *nm) +{ + pctr_t *pc; + + pc = ckd_calloc(1, sizeof(pctr_t)); + pc->name = ckd_salloc(nm); + pc->count = 0; + + return pc; +} + +void +pctr_reset(pctr_t * ctr) +{ + ctr->count = 0; +} + + +void +pctr_increment(pctr_t * ctr, int32 inc) +{ + ctr->count += inc; + /* E_INFO("Name %s, Count %d, inc %d\n",ctr->name, ctr->count, inc); */ +} + +void +pctr_print(FILE * fp, pctr_t * ctr) +{ + fprintf(fp, "CTR:"); + fprintf(fp, "[%d %s]", ctr->count, ctr->name); +} + +void +pctr_free(pctr_t * pc) +{ + if (pc) { + if (pc->name) + ckd_free(pc->name); + } + ckd_free(pc); +} + + +#if defined(_WIN32) && !defined(GNUWINCE) && !defined(__SYMBIAN32__) + +#define TM_LOWSCALE 1e-7 +#define TM_HIGHSCALE (4294967296.0 * TM_LOWSCALE); + +static float64 +make_sec(FILETIME * tm) +{ + float64 dt; + + dt = tm->dwLowDateTime * TM_LOWSCALE; + dt += tm->dwHighDateTime * TM_HIGHSCALE; + + return (dt); +} + +#else /* NOT WINDOWS */ + +static float64 +make_sec(struct timeval *s) +{ + return (s->tv_sec + s->tv_usec * 0.000001); +} + +#endif + + +void +ptmr_start(ptmr_t * tm) +{ +#if (! defined(_WIN32)) || defined(GNUWINCE) || defined(__SYMBIAN32__) + struct timeval e_start; /* Elapsed time */ + +#if (! defined(_HPUX_SOURCE)) && (! defined(__SYMBIAN32__)) + struct rusage start; /* CPU time */ + + /* Unix but not HPUX */ + getrusage(RUSAGE_SELF, &start); + tm->start_cpu = make_sec(&start.ru_utime) + make_sec(&start.ru_stime); +#endif + /* Unix + HP */ + gettimeofday(&e_start, 0); + tm->start_elapsed = make_sec(&e_start); +#elif defined(_WIN32_WP) + tm->start_cpu = GetTickCount64() / 1000; + tm->start_elapsed = GetTickCount64() / 1000; +#elif defined(_WIN32_WCE) + /* No GetProcessTimes() on WinCE. (Note CPU time will be bogus) */ + tm->start_cpu = GetTickCount() / 1000; + tm->start_elapsed = GetTickCount() / 1000; +#else + HANDLE pid; + FILETIME t_create, t_exit, kst, ust; + + /* PC */ + pid = GetCurrentProcess(); + GetProcessTimes(pid, &t_create, &t_exit, &kst, &ust); + tm->start_cpu = make_sec(&ust) + make_sec(&kst); + + tm->start_elapsed = (float64) clock() / CLOCKS_PER_SEC; +#endif +} + + +void +ptmr_stop(ptmr_t * tm) +{ + float64 dt_cpu, dt_elapsed; + +#if (! defined(_WIN32)) || defined(GNUWINCE) || defined(__SYMBIAN32__) + struct timeval e_stop; /* Elapsed time */ + +#if (! defined(_HPUX_SOURCE)) && (! defined(__SYMBIAN32__)) + struct rusage stop; /* CPU time */ + + /* Unix but not HPUX */ + getrusage(RUSAGE_SELF, &stop); + dt_cpu = + make_sec(&stop.ru_utime) + make_sec(&stop.ru_stime) - + tm->start_cpu; +#else + dt_cpu = 0.0; +#endif + /* Unix + HP */ + gettimeofday(&e_stop, 0); + dt_elapsed = (make_sec(&e_stop) - tm->start_elapsed); +#elif defined(_WIN32_WP) + dt_cpu = GetTickCount64() / 1000 - tm->start_cpu; + dt_elapsed = GetTickCount64() / 1000 - tm->start_elapsed; +#elif defined(_WIN32_WCE) + /* No GetProcessTimes() on WinCE. (Note CPU time will be bogus) */ + dt_cpu = GetTickCount() / 1000 - tm->start_cpu; + dt_elapsed = GetTickCount() / 1000 - tm->start_elapsed; +#else + HANDLE pid; + FILETIME t_create, t_exit, kst, ust; + + /* PC */ + pid = GetCurrentProcess(); + GetProcessTimes(pid, &t_create, &t_exit, &kst, &ust); + dt_cpu = make_sec(&ust) + make_sec(&kst) - tm->start_cpu; + dt_elapsed = ((float64) clock() / CLOCKS_PER_SEC) - tm->start_elapsed; +#endif + + tm->t_cpu += dt_cpu; + tm->t_elapsed += dt_elapsed; + + tm->t_tot_cpu += dt_cpu; + tm->t_tot_elapsed += dt_elapsed; +} + + +void +ptmr_reset(ptmr_t * tm) +{ + tm->t_cpu = 0.0; + tm->t_elapsed = 0.0; +} + + +void +ptmr_init(ptmr_t * tm) +{ + tm->t_cpu = 0.0; + tm->t_elapsed = 0.0; + tm->t_tot_cpu = 0.0; + tm->t_tot_elapsed = 0.0; +} + + +void +ptmr_reset_all(ptmr_t * tm) +{ + for (; tm->name; tm++) + ptmr_reset(tm); +} + + +void +ptmr_print_all(FILE * fp, ptmr_t * tm, float64 norm) +{ + if (norm != 0.0) { + norm = 1.0 / norm; + for (; tm->name; tm++) + fprintf(fp, " %6.2fx %s", tm->t_cpu * norm, tm->name); + } +} + + +int32 +host_endian(void) +{ + FILE *fp; + int32 BYTE_ORDER_MAGIC; + char *file; + char buf[8]; + int32 k, endian; + + file = "/tmp/__EnDiAn_TeSt__"; + + if ((fp = fopen(file, "wb")) == NULL) { + E_ERROR("Failed to open file '%s' for writing", file); + return -1; + } + + BYTE_ORDER_MAGIC = (int32) 0x11223344; + + k = (int32) BYTE_ORDER_MAGIC; + if (fwrite(&k, sizeof(int32), 1, fp) != 1) { + E_ERROR("Failed to write to file '%s'\n", file); + fclose(fp); + unlink(file); + return -1; + } + + fclose(fp); + if ((fp = fopen(file, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open file '%s' for reading", file); + unlink(file); + return -1; + } + if (fread(buf, 1, sizeof(int32), fp) != sizeof(int32)) { + E_ERROR("Failed to read from file '%s'\n", file); + fclose(fp); + unlink(file); + return -1; + } + fclose(fp); + unlink(file); + + /* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian */ + endian = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0; + + return (endian); +} diff --git a/media/sphinxbase/src/libsphinxbase/util/sbthread.c b/media/sphinxbase/src/libsphinxbase/util/sbthread.c new file mode 100644 index 000000000..28bf77356 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/sbthread.c @@ -0,0 +1,741 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file sbthread.c + * @brief Simple portable thread functions + * @author David Huggins-Daines + */ + +#include + +#include "sphinxbase/sbthread.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +/* + * Platform-specific parts: threads, mutexes, and signals. + */ +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(__SYMBIAN32__) +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0400 +#endif /* not _WIN32_WINNT */ +#include + +struct sbthread_s { + cmd_ln_t *config; + sbmsgq_t *msgq; + sbthread_main func; + void *arg; + HANDLE th; + DWORD tid; +}; + +struct sbmsgq_s { + /* Ringbuffer for passing messages. */ + char *data; + size_t depth; + size_t out; + size_t nbytes; + + /* Current message is stored here. */ + char *msg; + size_t msglen; + CRITICAL_SECTION mtx; + HANDLE evt; +}; + +struct sbevent_s { + HANDLE evt; +}; + +struct sbmtx_s { + CRITICAL_SECTION mtx; +}; + +DWORD WINAPI +sbthread_internal_main(LPVOID arg) +{ + sbthread_t *th = (sbthread_t *)arg; + int rv; + + rv = (*th->func)(th); + return (DWORD)rv; +} + +sbthread_t * +sbthread_start(cmd_ln_t *config, sbthread_main func, void *arg) +{ + sbthread_t *th; + + th = ckd_calloc(1, sizeof(*th)); + th->config = config; + th->func = func; + th->arg = arg; + th->msgq = sbmsgq_init(256); + th->th = CreateThread(NULL, 0, sbthread_internal_main, th, 0, &th->tid); + if (th->th == NULL) { + sbthread_free(th); + return NULL; + } + return th; +} + +int +sbthread_wait(sbthread_t *th) +{ + DWORD rv, exit; + + /* It has already been joined. */ + if (th->th == NULL) + return -1; + + rv = WaitForSingleObject(th->th, INFINITE); + if (rv == WAIT_FAILED) { + E_ERROR("Failed to join thread: WAIT_FAILED\n"); + return -1; + } + GetExitCodeThread(th->th, &exit); + CloseHandle(th->th); + th->th = NULL; + return (int)exit; +} + +static DWORD +cond_timed_wait(HANDLE cond, int sec, int nsec) +{ + DWORD rv; + if (sec == -1) { + rv = WaitForSingleObject(cond, INFINITE); + } + else { + DWORD ms; + + ms = sec * 1000 + nsec / (1000*1000); + rv = WaitForSingleObject(cond, ms); + } + return rv; +} + +/* Updated to use Unicode */ +sbevent_t * +sbevent_init(void) +{ + sbevent_t *evt; + + evt = ckd_calloc(1, sizeof(*evt)); + evt->evt = CreateEventW(NULL, FALSE, FALSE, NULL); + if (evt->evt == NULL) { + ckd_free(evt); + return NULL; + } + return evt; +} + +void +sbevent_free(sbevent_t *evt) +{ + CloseHandle(evt->evt); + ckd_free(evt); +} + +int +sbevent_signal(sbevent_t *evt) +{ + return SetEvent(evt->evt) ? 0 : -1; +} + +int +sbevent_wait(sbevent_t *evt, int sec, int nsec) +{ + DWORD rv; + + rv = cond_timed_wait(evt->evt, sec, nsec); + return rv; +} + +sbmtx_t * +sbmtx_init(void) +{ + sbmtx_t *mtx; + + mtx = ckd_calloc(1, sizeof(*mtx)); + InitializeCriticalSection(&mtx->mtx); + return mtx; +} + +int +sbmtx_trylock(sbmtx_t *mtx) +{ + return TryEnterCriticalSection(&mtx->mtx) ? 0 : -1; +} + +int +sbmtx_lock(sbmtx_t *mtx) +{ + EnterCriticalSection(&mtx->mtx); + return 0; +} + +int +sbmtx_unlock(sbmtx_t *mtx) +{ + LeaveCriticalSection(&mtx->mtx); + return 0; +} + +void +sbmtx_free(sbmtx_t *mtx) +{ + DeleteCriticalSection(&mtx->mtx); + ckd_free(mtx); +} + +sbmsgq_t * +sbmsgq_init(size_t depth) +{ + sbmsgq_t *msgq; + + msgq = ckd_calloc(1, sizeof(*msgq)); + msgq->depth = depth; + msgq->evt = CreateEventW(NULL, FALSE, FALSE, NULL); + if (msgq->evt == NULL) { + ckd_free(msgq); + return NULL; + } + InitializeCriticalSection(&msgq->mtx); + msgq->data = ckd_calloc(depth, 1); + msgq->msg = ckd_calloc(depth, 1); + return msgq; +} + +void +sbmsgq_free(sbmsgq_t *msgq) +{ + CloseHandle(msgq->evt); + ckd_free(msgq->data); + ckd_free(msgq->msg); + ckd_free(msgq); +} + +int +sbmsgq_send(sbmsgq_t *q, size_t len, void const *data) +{ + char const *cdata = (char const *)data; + size_t in; + + /* Don't allow things bigger than depth to be sent! */ + if (len + sizeof(len) > q->depth) + return -1; + + if (q->nbytes + len + sizeof(len) > q->depth) + WaitForSingleObject(q->evt, INFINITE); + + /* Lock things while we manipulate the buffer (FIXME: this + actually should have been atomic with the wait above ...) */ + EnterCriticalSection(&q->mtx); + in = (q->out + q->nbytes) % q->depth; + /* First write the size of the message. */ + if (in + sizeof(len) > q->depth) { + /* Handle the annoying case where the size field gets wrapped around. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, &len, len1); + memcpy(q->data, ((char *)&len) + len1, sizeof(len) - len1); + q->nbytes += sizeof(len); + in = sizeof(len) - len1; + } + else { + memcpy(q->data + in, &len, sizeof(len)); + q->nbytes += sizeof(len); + in += sizeof(len); + } + + /* Now write the message body. */ + if (in + len > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, cdata, len1); + q->nbytes += len1; + cdata += len1; + len -= len1; + in = 0; + } + memcpy(q->data + in, cdata, len); + q->nbytes += len; + + /* Signal the condition variable. */ + SetEvent(q->evt); + /* Unlock. */ + LeaveCriticalSection(&q->mtx); + + return 0; +} + +void * +sbmsgq_wait(sbmsgq_t *q, size_t *out_len, int sec, int nsec) +{ + char *outptr; + size_t len; + + /* Wait for data to be available. */ + if (q->nbytes == 0) { + if (cond_timed_wait(q->evt, sec, nsec) == WAIT_FAILED) + /* Timed out or something... */ + return NULL; + } + /* Lock to manipulate the queue (FIXME) */ + EnterCriticalSection(&q->mtx); + /* Get the message size. */ + if (q->out + sizeof(q->msglen) > q->depth) { + /* Handle annoying wraparound case. */ + size_t len1 = q->depth - q->out; + memcpy(&q->msglen, q->data + q->out, len1); + memcpy(((char *)&q->msglen) + len1, q->data, + sizeof(q->msglen) - len1); + q->out = sizeof(q->msglen) - len1; + } + else { + memcpy(&q->msglen, q->data + q->out, sizeof(q->msglen)); + q->out += sizeof(q->msglen); + } + q->nbytes -= sizeof(q->msglen); + /* Get the message body. */ + outptr = q->msg; + len = q->msglen; + if (q->out + q->msglen > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - q->out; + memcpy(outptr, q->data + q->out, len1); + outptr += len1; + len -= len1; + q->nbytes -= len1; + q->out = 0; + } + memcpy(outptr, q->data + q->out, len); + q->nbytes -= len; + q->out += len; + + /* Signal the condition variable. */ + SetEvent(q->evt); + /* Unlock. */ + LeaveCriticalSection(&q->mtx); + if (out_len) + *out_len = q->msglen; + return q->msg; +} + +#else /* POSIX */ +#include +#include + +struct sbthread_s { + cmd_ln_t *config; + sbmsgq_t *msgq; + sbthread_main func; + void *arg; + pthread_t th; +}; + +struct sbmsgq_s { + /* Ringbuffer for passing messages. */ + char *data; + size_t depth; + size_t out; + size_t nbytes; + + /* Current message is stored here. */ + char *msg; + size_t msglen; + pthread_mutex_t mtx; + pthread_cond_t cond; +}; + +struct sbevent_s { + pthread_mutex_t mtx; + pthread_cond_t cond; + int signalled; +}; + +struct sbmtx_s { + pthread_mutex_t mtx; +}; + +static void * +sbthread_internal_main(void *arg) +{ + sbthread_t *th = (sbthread_t *)arg; + int rv; + + rv = (*th->func)(th); + return (void *)(long)rv; +} + +sbthread_t * +sbthread_start(cmd_ln_t *config, sbthread_main func, void *arg) +{ + sbthread_t *th; + int rv; + + th = ckd_calloc(1, sizeof(*th)); + th->config = config; + th->func = func; + th->arg = arg; + th->msgq = sbmsgq_init(1024); + if ((rv = pthread_create(&th->th, NULL, &sbthread_internal_main, th)) != 0) { + E_ERROR("Failed to create thread: %d\n", rv); + sbthread_free(th); + return NULL; + } + return th; +} + +int +sbthread_wait(sbthread_t *th) +{ + void *exit; + int rv; + + /* It has already been joined. */ + if (th->th == (pthread_t)-1) + return -1; + + rv = pthread_join(th->th, &exit); + if (rv != 0) { + E_ERROR("Failed to join thread: %d\n", rv); + return -1; + } + th->th = (pthread_t)-1; + return (int)(long)exit; +} + +sbmsgq_t * +sbmsgq_init(size_t depth) +{ + sbmsgq_t *msgq; + + msgq = ckd_calloc(1, sizeof(*msgq)); + msgq->depth = depth; + if (pthread_cond_init(&msgq->cond, NULL) != 0) { + ckd_free(msgq); + return NULL; + } + if (pthread_mutex_init(&msgq->mtx, NULL) != 0) { + pthread_cond_destroy(&msgq->cond); + ckd_free(msgq); + return NULL; + } + msgq->data = ckd_calloc(depth, 1); + msgq->msg = ckd_calloc(depth, 1); + return msgq; +} + +void +sbmsgq_free(sbmsgq_t *msgq) +{ + pthread_mutex_destroy(&msgq->mtx); + pthread_cond_destroy(&msgq->cond); + ckd_free(msgq->data); + ckd_free(msgq->msg); + ckd_free(msgq); +} + +int +sbmsgq_send(sbmsgq_t *q, size_t len, void const *data) +{ + size_t in; + + /* Don't allow things bigger than depth to be sent! */ + if (len + sizeof(len) > q->depth) + return -1; + + /* Lock the condition variable while we manipulate the buffer. */ + pthread_mutex_lock(&q->mtx); + if (q->nbytes + len + sizeof(len) > q->depth) { + /* Unlock and wait for space to be available. */ + if (pthread_cond_wait(&q->cond, &q->mtx) != 0) { + /* Timed out, don't send anything. */ + pthread_mutex_unlock(&q->mtx); + return -1; + } + /* Condition is now locked again. */ + } + in = (q->out + q->nbytes) % q->depth; + + /* First write the size of the message. */ + if (in + sizeof(len) > q->depth) { + /* Handle the annoying case where the size field gets wrapped around. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, &len, len1); + memcpy(q->data, ((char *)&len) + len1, sizeof(len) - len1); + q->nbytes += sizeof(len); + in = sizeof(len) - len1; + } + else { + memcpy(q->data + in, &len, sizeof(len)); + q->nbytes += sizeof(len); + in += sizeof(len); + } + + /* Now write the message body. */ + if (in + len > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, data, len1); + q->nbytes += len1; + data = (char const *)data + len1; + len -= len1; + in = 0; + } + memcpy(q->data + in, data, len); + q->nbytes += len; + + /* Signal the condition variable. */ + pthread_cond_signal(&q->cond); + /* Unlock it, we have nothing else to do. */ + pthread_mutex_unlock(&q->mtx); + return 0; +} + +static int +cond_timed_wait(pthread_cond_t *cond, pthread_mutex_t *mtx, int sec, int nsec) +{ + int rv; + if (sec == -1) { + rv = pthread_cond_wait(cond, mtx); + } + else { + struct timeval now; + struct timespec end; + + gettimeofday(&now, NULL); + end.tv_sec = now.tv_sec + sec; + end.tv_nsec = now.tv_usec * 1000 + nsec; + if (end.tv_nsec > (1000*1000*1000)) { + sec += end.tv_nsec / (1000*1000*1000); + end.tv_nsec = end.tv_nsec % (1000*1000*1000); + } + rv = pthread_cond_timedwait(cond, mtx, &end); + } + return rv; +} + +void * +sbmsgq_wait(sbmsgq_t *q, size_t *out_len, int sec, int nsec) +{ + char *outptr; + size_t len; + + /* Lock the condition variable while we manipulate nmsg. */ + pthread_mutex_lock(&q->mtx); + if (q->nbytes == 0) { + /* Unlock the condition variable and wait for a signal. */ + if (cond_timed_wait(&q->cond, &q->mtx, sec, nsec) != 0) { + /* Timed out or something... */ + pthread_mutex_unlock(&q->mtx); + return NULL; + } + /* Condition variable is now locked again. */ + } + /* Get the message size. */ + if (q->out + sizeof(q->msglen) > q->depth) { + /* Handle annoying wraparound case. */ + size_t len1 = q->depth - q->out; + memcpy(&q->msglen, q->data + q->out, len1); + memcpy(((char *)&q->msglen) + len1, q->data, + sizeof(q->msglen) - len1); + q->out = sizeof(q->msglen) - len1; + } + else { + memcpy(&q->msglen, q->data + q->out, sizeof(q->msglen)); + q->out += sizeof(q->msglen); + } + q->nbytes -= sizeof(q->msglen); + /* Get the message body. */ + outptr = q->msg; + len = q->msglen; + if (q->out + q->msglen > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - q->out; + memcpy(outptr, q->data + q->out, len1); + outptr += len1; + len -= len1; + q->nbytes -= len1; + q->out = 0; + } + memcpy(outptr, q->data + q->out, len); + q->nbytes -= len; + q->out += len; + + /* Signal the condition variable. */ + pthread_cond_signal(&q->cond); + /* Unlock the condition variable, we are done. */ + pthread_mutex_unlock(&q->mtx); + if (out_len) + *out_len = q->msglen; + return q->msg; +} + +sbevent_t * +sbevent_init(void) +{ + sbevent_t *evt; + int rv; + + evt = ckd_calloc(1, sizeof(*evt)); + if ((rv = pthread_mutex_init(&evt->mtx, NULL)) != 0) { + E_ERROR("Failed to initialize mutex: %d\n", rv); + ckd_free(evt); + return NULL; + } + if ((rv = pthread_cond_init(&evt->cond, NULL)) != 0) { + E_ERROR_SYSTEM("Failed to initialize mutex: %d\n", rv); + pthread_mutex_destroy(&evt->mtx); + ckd_free(evt); + return NULL; + } + return evt; +} + +void +sbevent_free(sbevent_t *evt) +{ + pthread_mutex_destroy(&evt->mtx); + pthread_cond_destroy(&evt->cond); + ckd_free(evt); +} + +int +sbevent_signal(sbevent_t *evt) +{ + int rv; + + pthread_mutex_lock(&evt->mtx); + evt->signalled = TRUE; + rv = pthread_cond_signal(&evt->cond); + pthread_mutex_unlock(&evt->mtx); + return rv; +} + +int +sbevent_wait(sbevent_t *evt, int sec, int nsec) +{ + int rv = 0; + + /* Lock the mutex before we check its signalled state. */ + pthread_mutex_lock(&evt->mtx); + /* If it's not signalled, then wait until it is. */ + if (!evt->signalled) + rv = cond_timed_wait(&evt->cond, &evt->mtx, sec, nsec); + /* Set its state to unsignalled if we were successful. */ + if (rv == 0) + evt->signalled = FALSE; + /* And unlock its mutex. */ + pthread_mutex_unlock(&evt->mtx); + + return rv; +} + +sbmtx_t * +sbmtx_init(void) +{ + sbmtx_t *mtx; + + mtx = ckd_calloc(1, sizeof(*mtx)); + if (pthread_mutex_init(&mtx->mtx, NULL) != 0) { + ckd_free(mtx); + return NULL; + } + return mtx; +} + +int +sbmtx_trylock(sbmtx_t *mtx) +{ + return pthread_mutex_trylock(&mtx->mtx); +} + +int +sbmtx_lock(sbmtx_t *mtx) +{ + return pthread_mutex_lock(&mtx->mtx); +} + +int +sbmtx_unlock(sbmtx_t *mtx) +{ + return pthread_mutex_unlock(&mtx->mtx); +} + +void +sbmtx_free(sbmtx_t *mtx) +{ + pthread_mutex_destroy(&mtx->mtx); + ckd_free(mtx); +} +#endif /* not WIN32 */ + +cmd_ln_t * +sbthread_config(sbthread_t *th) +{ + return th->config; +} + +void * +sbthread_arg(sbthread_t *th) +{ + return th->arg; +} + +sbmsgq_t * +sbthread_msgq(sbthread_t *th) +{ + return th->msgq; +} + +int +sbthread_send(sbthread_t *th, size_t len, void const *data) +{ + return sbmsgq_send(th->msgq, len, data); +} + +void +sbthread_free(sbthread_t *th) +{ + sbthread_wait(th); + sbmsgq_free(th->msgq); + ckd_free(th); +} diff --git a/media/sphinxbase/src/libsphinxbase/util/slamch.c b/media/sphinxbase/src/libsphinxbase/util/slamch.c new file mode 100644 index 000000000..229458470 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/slamch.c @@ -0,0 +1,1029 @@ +/* src/slamch.f -- translated by f2c (version 20050501). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "sphinxbase/f2c.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b32 = 0.f; + +doublereal +slamch_(char *cmach, ftnlen cmach_len) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + real ret_val; + + /* Builtin functions */ + double pow_ri(real *, integer *); + + /* Local variables */ + static real t; + static integer it; + static real rnd, eps, base; + static integer beta; + static real emin, prec, emax; + static integer imin, imax; + static logical lrnd; + static real rmin, rmax, rmach; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + static real small, sfmin; + extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real + *, integer *, real *, integer *, + real *); + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMCH determines single precision machine parameters. */ + +/* Arguments */ +/* ========= */ + +/* CMACH (input) CHARACTER*1 */ +/* Specifies the value to be returned by SLAMCH: */ +/* = 'E' or 'e', SLAMCH := eps */ +/* = 'S' or 's , SLAMCH := sfmin */ +/* = 'B' or 'b', SLAMCH := base */ +/* = 'P' or 'p', SLAMCH := eps*base */ +/* = 'N' or 'n', SLAMCH := t */ +/* = 'R' or 'r', SLAMCH := rnd */ +/* = 'M' or 'm', SLAMCH := emin */ +/* = 'U' or 'u', SLAMCH := rmin */ +/* = 'L' or 'l', SLAMCH := emax */ +/* = 'O' or 'o', SLAMCH := rmax */ + +/* where */ + +/* eps = relative machine precision */ +/* sfmin = safe minimum, such that 1/sfmin does not overflow */ +/* base = base of the machine */ +/* prec = eps*base */ +/* t = number of (base) digits in the mantissa */ +/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ +/* emin = minimum exponent before (gradual) underflow */ +/* rmin = underflow threshold - base**(emin-1) */ +/* emax = largest exponent before overflow */ +/* rmax = overflow threshold - (base**emax)*(1-eps) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + first = FALSE_; + slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); + base = (real) beta; + t = (real) it; + if (lrnd) { + rnd = 1.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1) / 2; + } + else { + rnd = 0.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1); + } + prec = eps * base; + emin = (real) imin; + emax = (real) imax; + sfmin = rmin; + small = 1.f / rmax; + if (small >= sfmin) { + +/* Use SMALL plus a bit, to avoid the possibility of rounding */ +/* causing overflow when computing 1/sfmin. */ + + sfmin = small * (eps + 1.f); + } + } + + if (lsame_(cmach, "E", (ftnlen) 1, (ftnlen) 1)) { + rmach = eps; + } + else if (lsame_(cmach, "S", (ftnlen) 1, (ftnlen) 1)) { + rmach = sfmin; + } + else if (lsame_(cmach, "B", (ftnlen) 1, (ftnlen) 1)) { + rmach = base; + } + else if (lsame_(cmach, "P", (ftnlen) 1, (ftnlen) 1)) { + rmach = prec; + } + else if (lsame_(cmach, "N", (ftnlen) 1, (ftnlen) 1)) { + rmach = t; + } + else if (lsame_(cmach, "R", (ftnlen) 1, (ftnlen) 1)) { + rmach = rnd; + } + else if (lsame_(cmach, "M", (ftnlen) 1, (ftnlen) 1)) { + rmach = emin; + } + else if (lsame_(cmach, "U", (ftnlen) 1, (ftnlen) 1)) { + rmach = rmin; + } + else if (lsame_(cmach, "L", (ftnlen) 1, (ftnlen) 1)) { + rmach = emax; + } + else if (lsame_(cmach, "O", (ftnlen) 1, (ftnlen) 1)) { + rmach = rmax; + } + + ret_val = rmach; + return ret_val; + +/* End of SLAMCH */ + +} /* slamch_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc1_(integer * beta, integer * t, logical * rnd, logical * ieee1) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + real r__1, r__2; + + /* Local variables */ + static real a, b, c__, f, t1, t2; + static integer lt; + static real one, qtr; + static logical lrnd; + static integer lbeta; + static real savec; + static logical lieee1; + extern doublereal slamc3_(real *, real *); + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC1 determines the machine parameters given by BETA, T, RND, and */ +/* IEEE1. */ + +/* Arguments */ +/* ========= */ + +/* BETA (output) INTEGER */ +/* The base of the machine. */ + +/* T (output) INTEGER */ +/* The number of ( BETA ) digits in the mantissa. */ + +/* RND (output) LOGICAL */ +/* Specifies whether proper rounding ( RND = .TRUE. ) or */ +/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ +/* be a reliable guide to the way in which the machine performs */ +/* its arithmetic. */ + +/* IEEE1 (output) LOGICAL */ +/* Specifies whether rounding appears to be done in the IEEE */ +/* 'round to nearest' style. */ + +/* Further Details */ +/* =============== */ + +/* The routine is based on the routine ENVRON by Malcolm and */ +/* incorporates suggestions by Gentleman and Marovich. See */ + +/* Malcolm M. A. (1972) Algorithms to reveal properties of */ +/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ + +/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ +/* that reveal properties of floating point arithmetic units. */ +/* Comms. of the ACM, 17, 276-277. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + first = FALSE_; + one = 1.f; + +/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ +/* IEEE1, T and RND. */ + +/* Throughout this routine we use the function SLAMC3 to ensure */ +/* that relevant values are stored and not held in registers, or */ +/* are not affected by optimizers. */ + +/* Compute a = 2.0**m with the smallest positive integer m such */ +/* that */ + +/* fl( a + 1.0 ) = a. */ + + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ + L10: + if (c__ == one) { + a *= 2; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L10; + } +/* + END WHILE */ + +/* Now compute b = 2.0**m with the smallest positive integer m */ +/* such that */ + +/* fl( a + b ) .gt. a. */ + + b = 1.f; + c__ = slamc3_(&a, &b); + +/* + WHILE( C.EQ.A )LOOP */ + L20: + if (c__ == a) { + b *= 2; + c__ = slamc3_(&a, &b); + goto L20; + } +/* + END WHILE */ + +/* Now compute the base. a and c are neighbouring floating point */ +/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ +/* their difference is beta. Adding 0.25 to c is to ensure that it */ +/* is truncated to beta and not ( beta - 1 ). */ + + qtr = one / 4; + savec = c__; + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + lbeta = c__ + qtr; + +/* Now determine whether rounding or chopping occurs, by adding a */ +/* bit less than beta/2 and a bit more than beta/2 to a. */ + + b = (real) lbeta; + r__1 = b / 2; + r__2 = -b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (c__ == a) { + lrnd = TRUE_; + } + else { + lrnd = FALSE_; + } + r__1 = b / 2; + r__2 = b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (lrnd && c__ == a) { + lrnd = FALSE_; + } + +/* Try and decide whether rounding is done in the IEEE 'round to */ +/* nearest' style. B/2 is half a unit in the last place of the two */ +/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ +/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ +/* A, but adding B/2 to SAVEC should change SAVEC. */ + + r__1 = b / 2; + t1 = slamc3_(&r__1, &a); + r__1 = b / 2; + t2 = slamc3_(&r__1, &savec); + lieee1 = t1 == a && t2 > savec && lrnd; + +/* Now find the mantissa, t. It should be the integer part of */ +/* log to the base beta of a, however it is safer to determine t */ +/* by powering. So we find t as the smallest positive integer for */ +/* which */ + +/* fl( beta**t + 1.0 ) = 1.0. */ + + lt = 0; + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ + L30: + if (c__ == one) { + ++lt; + a *= lbeta; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L30; + } +/* + END WHILE */ + + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *ieee1 = lieee1; + return 0; + +/* End of SLAMC1 */ + +} /* slamc1_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc2_(integer * beta, integer * t, logical * rnd, real * + eps, integer * emin, real * rmin, integer * emax, real * rmax) +{ + /* Initialized data */ + + static logical first = TRUE_; + static logical iwarn = FALSE_; + + /* Format strings */ + static char fmt_9999[] = + "(//\002 WARNING. The value EMIN may be incorre" + "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" + "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" + " the IF block as marked within the code of routine\002,\002 SLAM" + "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; + + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4, r__5; + + /* Builtin functions */ + double pow_ri(real *, integer *); + integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), + e_wsfe(void); + + /* Local variables */ + static real a, b, c__; + static integer i__, lt; + static real one, two; + static logical ieee; + static real half; + static logical lrnd; + static real leps, zero; + static integer lbeta; + static real rbase; + static integer lemin, lemax, gnmin; + static real small; + static integer gpmin; + static real third, lrmin, lrmax, sixth; + static logical lieee1; + extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, + logical *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slamc4_(integer *, real *, integer *), + slamc5_(integer *, integer *, integer *, logical *, integer *, + real *); + static integer ngnmin, ngpmin; + + /* Fortran I/O blocks */ + static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; + + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC2 determines the machine parameters specified in its argument */ +/* list. */ + +/* Arguments */ +/* ========= */ + +/* BETA (output) INTEGER */ +/* The base of the machine. */ + +/* T (output) INTEGER */ +/* The number of ( BETA ) digits in the mantissa. */ + +/* RND (output) LOGICAL */ +/* Specifies whether proper rounding ( RND = .TRUE. ) or */ +/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ +/* be a reliable guide to the way in which the machine performs */ +/* its arithmetic. */ + +/* EPS (output) REAL */ +/* The smallest positive number such that */ + +/* fl( 1.0 - EPS ) .LT. 1.0, */ + +/* where fl denotes the computed value. */ + +/* EMIN (output) INTEGER */ +/* The minimum exponent before (gradual) underflow occurs. */ + +/* RMIN (output) REAL */ +/* The smallest normalized number for the machine, given by */ +/* BASE**( EMIN - 1 ), where BASE is the floating point value */ +/* of BETA. */ + +/* EMAX (output) INTEGER */ +/* The maximum exponent before overflow occurs. */ + +/* RMAX (output) REAL */ +/* The largest positive number for the machine, given by */ +/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ +/* value of BETA. */ + +/* Further Details */ +/* =============== */ + +/* The computation of EPS is based on a routine PARANOIA by */ +/* W. Kahan of the University of California at Berkeley. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + first = FALSE_; + zero = 0.f; + one = 1.f; + two = 2.f; + +/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ +/* BETA, T, RND, EPS, EMIN and RMIN. */ + +/* Throughout this routine we use the function SLAMC3 to ensure */ +/* that relevant values are stored and not held in registers, or */ +/* are not affected by optimizers. */ + +/* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ + + slamc1_(&lbeta, <, &lrnd, &lieee1); + +/* Start to find EPS. */ + + b = (real) lbeta; + i__1 = -lt; + a = pow_ri(&b, &i__1); + leps = a; + +/* Try some tricks to see whether or not this is the correct EPS. */ + + b = two / 3; + half = one / 2; + r__1 = -half; + sixth = slamc3_(&b, &r__1); + third = slamc3_(&sixth, &sixth); + r__1 = -half; + b = slamc3_(&third, &r__1); + b = slamc3_(&b, &sixth); + b = dabs(b); + if (b < leps) { + b = leps; + } + + leps = 1.f; + +/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ + L10: + if (leps > b && b > zero) { + leps = b; + r__1 = half * leps; +/* Computing 5th power */ + r__3 = two, r__4 = r__3, r__3 *= r__3; +/* Computing 2nd power */ + r__5 = leps; + r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); + c__ = slamc3_(&r__1, &r__2); + r__1 = -c__; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + r__1 = -b; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + goto L10; + } +/* + END WHILE */ + + if (a < leps) { + leps = a; + } + +/* Computation of EPS complete. */ + +/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ +/* Keep dividing A by BETA until (gradual) underflow occurs. This */ +/* is detected when we cannot recover the previous A. */ + + rbase = one / lbeta; + small = one; + for (i__ = 1; i__ <= 3; ++i__) { + r__1 = small * rbase; + small = slamc3_(&r__1, &zero); +/* L20: */ + } + a = slamc3_(&one, &small); + slamc4_(&ngpmin, &one, &lbeta); + r__1 = -one; + slamc4_(&ngnmin, &r__1, &lbeta); + slamc4_(&gpmin, &a, &lbeta); + r__1 = -a; + slamc4_(&gnmin, &r__1, &lbeta); + ieee = FALSE_; + + if (ngpmin == ngnmin && gpmin == gnmin) { + if (ngpmin == gpmin) { + lemin = ngpmin; +/* ( Non twos-complement machines, no gradual underflow; */ +/* e.g., VAX ) */ + } + else if (gpmin - ngpmin == 3) { + lemin = ngpmin - 1 + lt; + ieee = TRUE_; +/* ( Non twos-complement machines, with gradual underflow; */ +/* e.g., IEEE standard followers ) */ + } + else { + lemin = min(ngpmin, gpmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } + else if (ngpmin == gpmin && ngnmin == gnmin) { + if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { + lemin = max(ngpmin, ngnmin); +/* ( Twos-complement machines, no gradual underflow; */ +/* e.g., CYBER 205 ) */ + } + else { + lemin = min(ngpmin, ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } + else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 + && gpmin == gnmin) { + if (gpmin - min(ngpmin, ngnmin) == 3) { + lemin = max(ngpmin, ngnmin) - 1 + lt; +/* ( Twos-complement machines with gradual underflow; */ +/* no known machine ) */ + } + else { + lemin = min(ngpmin, ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } + else { +/* Computing MIN */ + i__1 = min(ngpmin, ngnmin), i__1 = min(i__1, gpmin); + lemin = min(i__1, gnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } +/* ** */ +/* Comment out this if block if EMIN is ok */ + if (iwarn) { + first = TRUE_; + s_wsfe(&io___58); + do_fio(&c__1, (char *) &lemin, (ftnlen) sizeof(integer)); + e_wsfe(); + } +/* ** */ + +/* Assume IEEE arithmetic if we found denormalised numbers above, */ +/* or if arithmetic seems to round in the IEEE style, determined */ +/* in routine SLAMC1. A true IEEE machine should have both things */ +/* true; however, faulty machines may have one or the other. */ + + ieee = ieee || lieee1; + +/* Compute RMIN by successive division by BETA. We could compute */ +/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ +/* this computation. */ + + lrmin = 1.f; + i__1 = 1 - lemin; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = lrmin * rbase; + lrmin = slamc3_(&r__1, &zero); +/* L30: */ + } + +/* Finally, call SLAMC5 to compute EMAX and RMAX. */ + + slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *eps = leps; + *emin = lemin; + *rmin = lrmin; + *emax = lemax; + *rmax = lrmax; + + return 0; + + +/* End of SLAMC2 */ + +} /* slamc2_ */ + + +/* *********************************************************************** */ + +doublereal +slamc3_(real * a, real * b) +{ + /* System generated locals */ + real ret_val; + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC3 is intended to force A and B to be stored prior to doing */ +/* the addition of A and B , for use in situations where optimizers */ +/* might hold one of these in a register. */ + +/* Arguments */ +/* ========= */ + +/* A, B (input) REAL */ +/* The values A and B. */ + +/* ===================================================================== */ + +/* .. Executable Statements .. */ + + ret_val = *a + *b; + + return ret_val; + +/* End of SLAMC3 */ + +} /* slamc3_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc4_(integer * emin, real * start, integer * base) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static real a; + static integer i__; + static real b1, b2, c1, c2, d1, d2, one, zero, rbase; + extern doublereal slamc3_(real *, real *); + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC4 is a service routine for SLAMC2. */ + +/* Arguments */ +/* ========= */ + +/* EMIN (output) EMIN */ +/* The minimum exponent before (gradual) underflow, computed by */ +/* setting A = START and dividing by BASE until the previous A */ +/* can not be recovered. */ + +/* START (input) REAL */ +/* The starting point for determining EMIN. */ + +/* BASE (input) INTEGER */ +/* The base of the machine. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + a = *start; + one = 1.f; + rbase = one / *base; + zero = 0.f; + *emin = 1; + r__1 = a * rbase; + b1 = slamc3_(&r__1, &zero); + c1 = a; + c2 = a; + d1 = a; + d2 = a; +/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ +/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ + L10: + if (c1 == a && c2 == a && d1 == a && d2 == a) { + --(*emin); + a = b1; + r__1 = a / *base; + b1 = slamc3_(&r__1, &zero); + r__1 = b1 * *base; + c1 = slamc3_(&r__1, &zero); + d1 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d1 += b1; +/* L20: */ + } + r__1 = a * rbase; + b2 = slamc3_(&r__1, &zero); + r__1 = b2 / rbase; + c2 = slamc3_(&r__1, &zero); + d2 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d2 += b2; +/* L30: */ + } + goto L10; + } +/* + END WHILE */ + + return 0; + +/* End of SLAMC4 */ + +} /* slamc4_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc5_(integer * beta, integer * p, integer * emin, + logical * ieee, integer * emax, real * rmax) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static integer i__; + static real y, z__; + static integer try__, lexp; + static real oldy; + static integer uexp, nbits; + extern doublereal slamc3_(real *, real *); + static real recbas; + static integer exbits, expsum; + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC5 attempts to compute RMAX, the largest machine floating-point */ +/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ +/* approximately to a power of 2. It will fail on machines where this */ +/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ +/* EMAX = 28718). It will also fail if the value supplied for EMIN is */ +/* too large (i.e. too close to zero), probably with overflow. */ + +/* Arguments */ +/* ========= */ + +/* BETA (input) INTEGER */ +/* The base of floating-point arithmetic. */ + +/* P (input) INTEGER */ +/* The number of base BETA digits in the mantissa of a */ +/* floating-point value. */ + +/* EMIN (input) INTEGER */ +/* The minimum exponent before (gradual) underflow. */ + +/* IEEE (input) LOGICAL */ +/* A logical flag specifying whether or not the arithmetic */ +/* system is thought to comply with the IEEE standard. */ + +/* EMAX (output) INTEGER */ +/* The largest exponent before overflow */ + +/* RMAX (output) REAL */ +/* The largest machine floating-point number. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* First compute LEXP and UEXP, two powers of 2 that bound */ +/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ +/* approximately to the bound that is closest to abs(EMIN). */ +/* (EMAX is the exponent of the required number RMAX). */ + + lexp = 1; + exbits = 1; + L10: + try__ = lexp << 1; + if (try__ <= -(*emin)) { + lexp = try__; + ++exbits; + goto L10; + } + if (lexp == -(*emin)) { + uexp = lexp; + } + else { + uexp = try__; + ++exbits; + } + +/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ +/* than or equal to EMIN. EXBITS is the number of bits needed to */ +/* store the exponent. */ + + if (uexp + *emin > -lexp - *emin) { + expsum = lexp << 1; + } + else { + expsum = uexp << 1; + } + +/* EXPSUM is the exponent range, approximately equal to */ +/* EMAX - EMIN + 1 . */ + + *emax = expsum + *emin - 1; + nbits = exbits + 1 + *p; + +/* NBITS is the total number of bits needed to store a */ +/* floating-point number. */ + + if (nbits % 2 == 1 && *beta == 2) { + +/* Either there are an odd number of bits used to store a */ +/* floating-point number, which is unlikely, or some bits are */ +/* not used in the representation of numbers, which is possible, */ +/* (e.g. Cray machines) or the mantissa has an implicit bit, */ +/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ +/* most likely. We have to assume the last alternative. */ +/* If this is true, then we need to reduce EMAX by one because */ +/* there must be some way of representing zero in an implicit-bit */ +/* system. On machines like Cray, we are reducing EMAX by one */ +/* unnecessarily. */ + + --(*emax); + } + + if (*ieee) { + +/* Assume we are on an IEEE machine which reserves one exponent */ +/* for infinity and NaN. */ + + --(*emax); + } + +/* Now create RMAX, the largest machine number, which should */ +/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ + +/* First compute 1.0 - BETA**(-P), being careful that the */ +/* result is less than 1.0 . */ + + recbas = 1.f / *beta; + z__ = *beta - 1.f; + y = 0.f; + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + z__ *= recbas; + if (y < 1.f) { + oldy = y; + } + y = slamc3_(&y, &z__); +/* L20: */ + } + if (y >= 1.f) { + y = oldy; + } + +/* Now multiply by BETA**EMAX to get RMAX. */ + + i__1 = *emax; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = y * *beta; + y = slamc3_(&r__1, &c_b32); +/* L30: */ + } + + *rmax = y; + return 0; + +/* End of SLAMC5 */ + +} /* slamc5_ */ diff --git a/media/sphinxbase/src/libsphinxbase/util/slapack_lite.c b/media/sphinxbase/src/libsphinxbase/util/slapack_lite.c new file mode 100644 index 000000000..4d4e1af31 --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/slapack_lite.c @@ -0,0 +1,1461 @@ +/* +NOTE: This is generated code. Look in README.python for information on + remaking this file. +*/ +#include "sphinxbase/f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal slamch_(char *); +#define EPSILON slamch_("Epsilon") +#define SAFEMINIMUM slamch_("Safe minimum") +#define PRECISION slamch_("Precision") +#define BASE slamch_("Base") +#endif + + +extern doublereal slapy2_(real *, real *); + + + +/* Table of constant values */ + +static integer c__0 = 0; +static real c_b163 = 0.f; +static real c_b164 = 1.f; +static integer c__1 = 1; +static real c_b181 = -1.f; +static integer c_n1 = -1; + +integer ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, + newzro; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1998 + + + Purpose + ======= + + IEEECK is called from the ILAENV to verify that Infinity and + possibly NaN arithmetic is safe (i.e. will not trap). + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies whether to test just for inifinity arithmetic + or whether to test for infinity and NaN arithmetic. + = 0: Verify infinity arithmetic only. + = 1: Verify infinity and NaN arithmetic. + + ZERO (input) REAL + Must contain the value 0.0 + This is passed to prevent the compiler from optimizing + away this code. + + ONE (input) REAL + Must contain the value 1.0 + This is passed to prevent the compiler from optimizing + away this code. + + RETURN VALUE: INTEGER + = 0: Arithmetic failed to produce the correct answers + = 1: Arithmetic produced the correct answers +*/ + + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * 0.f; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* ieeeck_ */ + +integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen + opts_len) +{ + /* System generated locals */ + integer ret_val; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + static integer i__; + static char c1[1], c2[2], c3[3], c4[2]; + static integer ic, nb, iz, nx; + static logical cname, sname; + static integer nbmin; + extern integer ieeeck_(integer *, real *, real *); + static char subnam[6]; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ILAENV is called from the LAPACK routines to choose problem-dependent + parameters for the local environment. See ISPEC for a description of + the parameters. + + This version provides a set of parameters which should give good, + but not optimal, performance on many of the currently available + computers. Users are encouraged to modify this subroutine to set + the tuning parameters for their particular machine using the option + and problem size information in the arguments. + + This routine will not function correctly if it is converted to all + lower case. Converting it to all upper case is allowed. + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies the parameter to be returned as the value of + ILAENV. + = 1: the optimal blocksize; if this value is 1, an unblocked + algorithm will give the best performance. + = 2: the minimum block size for which the block routine + should be used; if the usable block size is less than + this value, an unblocked routine should be used. + = 3: the crossover point (in a block routine, for N less + than this value, an unblocked routine should be used) + = 4: the number of shifts, used in the nonsymmetric + eigenvalue routines + = 5: the minimum column dimension for blocking to be used; + rectangular blocks must have dimension at least k by m, + where k is given by ILAENV(2,...) and m by ILAENV(5,...) + = 6: the crossover point for the SVD (when reducing an m by n + matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds + this value, a QR factorization is used first to reduce + the matrix to a triangular form.) + = 7: the number of processors + = 8: the crossover point for the multishift QR and QZ methods + for nonsymmetric eigenvalue problems. + = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) + =10: ieee NaN arithmetic can be trusted not to trap + =11: infinity arithmetic can be trusted not to trap + + NAME (input) CHARACTER*(*) + The name of the calling subroutine, in either upper case or + lower case. + + OPTS (input) CHARACTER*(*) + The character options to the subroutine NAME, concatenated + into a single character string. For example, UPLO = 'U', + TRANS = 'T', and DIAG = 'N' for a triangular routine would + be specified as OPTS = 'UTN'. + + N1 (input) INTEGER + N2 (input) INTEGER + N3 (input) INTEGER + N4 (input) INTEGER + Problem dimensions for the subroutine NAME; these may not all + be required. + + (ILAENV) (output) INTEGER + >= 0: the value of the parameter specified by ISPEC + < 0: if ILAENV = -k, the k-th argument had an illegal value. + + Further Details + =============== + + The following conventions have been used when calling ILAENV from the + LAPACK routines: + 1) OPTS is a concatenation of all of the character options to + subroutine NAME, in the same order that they appear in the + argument list for NAME, even if they are not used in determining + the value of the parameter specified by ISPEC. + 2) The problem dimensions N1, N2, N3, N4 are specified in the order + that they appear in the argument list for NAME. N1 is used + first, N2 second, and so on, and unused problem dimensions are + passed a value of -1. + 3) The parameter value returned by ILAENV is checked for validity in + the calling subroutine. For example, ILAENV is used to retrieve + the optimal blocksize for STRTRI as follows: + + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 ) NB = MAX( 1, N ) + + ===================================================================== +*/ + + + switch (*ispec) { + case 1: goto L100; + case 2: goto L100; + case 3: goto L100; + case 4: goto L400; + case 5: goto L500; + case 6: goto L600; + case 7: goto L700; + case 8: goto L800; + case 9: goto L900; + case 10: goto L1000; + case 11: goto L1100; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L100: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)6, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L10: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && + ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= + 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L20: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L30: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); + + switch (*ispec) { + case 1: goto L110; + case 2: goto L200; + case 3: goto L300; + } + +L110: + +/* + ISPEC = 1: block size + + In these examples, separate code is provided for setting NB for + real and complex. We assume that NB will take the same value in + single or double precision. +*/ + + nb = 1; + + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, + "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) + 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) { + nb = 1; + } + } + ret_val = nb; + return ret_val; + +L200: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } + ret_val = nbmin; + return ret_val; + +L300: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } + ret_val = nx; + return ret_val; + +L400: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L500: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L600: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); + return ret_val; + +L700: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L800: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L900: + +/* + ISPEC = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) +*/ + + ret_val = 25; + return ret_val; + +L1000: + +/* + ISPEC = 10: ieee NaN arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__0, &c_b163, &c_b164); + } + return ret_val; + +L1100: + +/* + ISPEC = 11: infinity arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__1, &c_b163, &c_b164); + } + return ret_val; + +/* End of ILAENV */ + +} /* ilaenv_ */ + +/* Subroutine */ int sposv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), spotrf_( + char *, integer *, real *, integer *, integer *), spotrs_( + char *, integer *, integer *, real *, integer *, real *, integer * + , integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOSV computes the solution to a real system of linear equations + A * X = B, + where A is an N-by-N symmetric positive definite matrix and X and B + are N-by-NRHS matrices. + + The Cholesky decomposition is used to factor A as + A = U**T* U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is a lower triangular + matrix. The factored form of A is then used to solve the system of + equations A * X = B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the N-by-NRHS right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i of A is not + positive definite, so the factorization could not be + completed, and the solution has not been computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOSV ", &i__1); + return 0; + } + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + spotrf_(uplo, n, &a[a_offset], lda, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + spotrs_(uplo, n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); + + } + return 0; + +/* End of SPOSV */ + +} /* sposv_ */ + +/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer j; + static real ajj; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SPOTF2 computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0.f) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + sgemv_("Transpose", &i__2, &i__3, &c_b181, &a[(j + 1) * + a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b164, + &a[j + (j + 1) * a_dim1], lda); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0.f) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b181, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b164, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of SPOTF2 */ + +} /* spotf2_ */ + +/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer j, jb, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), ssyrk_(char *, char *, integer + *, integer *, real *, real *, integer *, real *, real *, integer * + ), spotf2_(char *, integer *, real *, integer *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRF computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U**T * U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + spotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b181, &a[j * + a_dim1 + 1], lda, &c_b164, &a[j + j * a_dim1], lda); + spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b181, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b164, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b164, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b181, &a[j + + a_dim1], lda, &c_b164, &a[j + j * a_dim1], lda); + spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b181, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b164, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b164, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of SPOTRF */ + +} /* spotrf_ */ + +/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRS solves a system of linear equations A*X = B with a symmetric + positive definite matrix A using the Cholesky factorization + A = U**T*U or A = L*L**T computed by SPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**T*U or A = L*L**T, as computed by SPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b164, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b164, + &a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b164, + &a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b164, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of SPOTRS */ + +} /* spotrs_ */ + diff --git a/media/sphinxbase/src/libsphinxbase/util/strfuncs.c b/media/sphinxbase/src/libsphinxbase/util/strfuncs.c new file mode 100644 index 000000000..4d2d72ffc --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/strfuncs.c @@ -0,0 +1,194 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * strfuncs.c -- String functions + */ + + +#include +#include +#include +#include +#include + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" + +/* Defined in dtoa.c */ +double sb_strtod(const char *s00, char **se); + +double +atof_c(char const *str) +{ + return sb_strtod(str, NULL); +} + +/* Locale-independent isspace to avoid different incompatibilities */ +static int +isspace_c(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') + return 1; + return 0; +} + +char * +string_join(const char *base, ...) +{ + va_list args; + size_t len; + const char *c; + char *out; + + va_start(args, base); + len = strlen(base); + while ((c = va_arg(args, const char *)) != NULL) { + len += strlen(c); + } + len++; + va_end(args); + + out = ckd_calloc(len, 1); + va_start(args, base); + strcpy(out, base); + while ((c = va_arg(args, const char *)) != NULL) { + strcat(out, c); + } + va_end(args); + + return out; +} + +char * +string_trim(char *string, enum string_edge_e which) +{ + size_t len; + + len = strlen(string); + if (which == STRING_START || which == STRING_BOTH) { + size_t sub = strspn(string, " \t\n\r\f"); + if (sub > 0) { + memmove(string, string + sub, len + 1 - sub); + len -= sub; + } + } + if (which == STRING_END || which == STRING_BOTH) { + long sub = len; + while (--sub >= 0) + if (strchr(" \t\n\r\f", string[sub]) == NULL) + break; + if (sub == -1) + string[0] = '\0'; + else + string[sub+1] = '\0'; + } + return string; +} + +int32 +str2words(char *line, char **ptr, int32 max_ptr) +{ + int32 i, n; + + n = 0; /* #words found so far */ + i = 0; /* For scanning through the input string */ + while (1) { + /* Skip whitespace before next word */ + while (line[i] && isspace_c(line[i])) + ++i; + if (!line[i]) + break; + + if (ptr != NULL && n >= max_ptr) { + /* + * Pointer array size insufficient. Restore NULL chars inserted so far + * to space chars. Not a perfect restoration, but better than nothing. + */ + for (; i >= 0; --i) + if (line[i] == '\0') + line[i] = ' '; + + return -1; + } + + /* Scan to end of word */ + if (ptr != NULL) + ptr[n] = line + i; + ++n; + while (line[i] && !isspace_c(line[i])) + ++i; + if (!line[i]) + break; + if (ptr != NULL) + line[i] = '\0'; + ++i; + } + + return n; +} + + +int32 +nextword(char *line, const char *delim, char **word, char *delimfound) +{ + const char *d; + char *w; + + /* Skip past any preceding delimiters */ + for (w = line; *w; w++) { + for (d = delim; *d && (*d != *w); d++); + if (!*d) + break; + } + if (!*w) + return -1; + + *word = w; /* Beginning of word */ + + /* Skip until first delimiter char */ + for (w++; *w; w++) { + for (d = delim; *d && (*d != *w); d++); + if (*d) + break; + } + + /* Replace delimiter with NULL char, but return the original first */ + *delimfound = *w; + *w = '\0'; + + return (w - *word); +} diff --git a/media/sphinxbase/src/libsphinxbase/util/utf8.c b/media/sphinxbase/src/libsphinxbase/util/utf8.c new file mode 100644 index 000000000..bd763da3f --- /dev/null +++ b/media/sphinxbase/src/libsphinxbase/util/utf8.c @@ -0,0 +1,75 @@ +/* Copyright (c) 2008-2009 Bjoern Hoehrmann */ +/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ + +/* Slightly modified to use Sphinx types and remove explicit inline. */ + +#include "sphinxbase/prim_type.h" + +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 1 + +static const uint8 utf8d[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00..1f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 20..3f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 40..5f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 60..7f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, /* 80..9f */ + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* a0..bf */ + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* c0..df */ + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, /* e0..ef */ + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, /* f0..ff */ + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, /* s0..s0 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, /* s1..s2 */ + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, /* s3..s4 */ + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, /* s5..s6 */ + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* s7..s8 */ +}; + +uint32 +utf8_decode(uint32 *state, uint32 *codep, uint32 byte) { + uint32 type = utf8d[byte]; + + *codep = (*state != UTF8_ACCEPT) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state*16 + type]; + return *state; +} + +/* CMU code starts here. */ +/* ==================================================================== + * Copyright (c) 2009 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ diff --git a/media/sphinxbase/update.sh b/media/sphinxbase/update.sh new file mode 100755 index 000000000..e0188b3ff --- /dev/null +++ b/media/sphinxbase/update.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# +# Usage: ./update.sh +# +# Copies the needed files from a directory containing the original +# sphinxbase source, and applies any local patches we're carrying. + + +# Create config.h +echo "#if ( defined(_WIN32) || defined(__CYGWIN__) )" > config.h +cat $1/include/win32/config.h >> config.h +echo "#else" >> config.h +cat $1/include/android/config.h >> config.h +echo "#endif" >> config.h + +# Create sphinx_config.h +echo "#if ( defined(_WIN32) || defined(__CYGWIN__) )" > sphinx_config.h +cat $1/include/win32/sphinx_config.h >> sphinx_config.h +echo "#else" >> sphinx_config.h +cat $1/include/android/sphinx_config.h >> sphinx_config.h +echo "#endif" >> sphinx_config.h +sed -i '' -e 's/#define HAVE_LONG_LONG 1/\/*#define HAVE_LONG_LONG 1*\//g' sphinx_config.h + +# Copy created file +cp sphinx_config.h sphinxbase/sphinx_config.h + +# Copy source files +cp $1/include/sphinxbase/*.h sphinxbase/ +cp $1/src/libsphinxbase/fe/*.c src/libsphinxbase/fe/ +cp $1/src/libsphinxbase/fe/*.h src/libsphinxbase/fe/ +cp $1/src/libsphinxbase/feat/*.c src/libsphinxbase/feat/ +cp $1/src/libsphinxbase/lm/*.c src/libsphinxbase/lm/ +cp $1/src/libsphinxbase/lm/*.h src/libsphinxbase/lm/ +cp $1/src/libsphinxbase/util/*.c src/libsphinxbase/util/ + +# Apply any patches against upstream here. +patch -l -p1 < sbthread.patch -- cgit v1.2.3