summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/util/huff_code.c
diff options
context:
space:
mode:
authorMoonchild <moonchild@palemoon.org>2020-05-20 10:19:04 +0000
committerMoonchild <moonchild@palemoon.org>2020-05-20 14:04:17 +0000
commit99c2e698d2a3c56649e42d8d2133706cd8c9501e (patch)
tree85be449d772eb57860f0f386efb4bc1e790fd498 /media/sphinxbase/src/libsphinxbase/util/huff_code.c
parent15ac4021b06d549e47c9e2efc9364a9eb96bfe82 (diff)
downloadUXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar
UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.gz
UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.lz
UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.tar.xz
UXP-99c2e698d2a3c56649e42d8d2133706cd8c9501e.zip
Issue #1538 - remove speech recognition engine
This removes speech recognition, pocketsphinx, training models and the speech automated test interface. This also re-establishes proper use of MOZ_WEBSPEECH to work for the speech API (synthesis part only) that was a broken mess before, with some synth parts being always built, some parts being built only with it enabled and recognition parts being dependent on it. I'm pretty sure it'd be totally busted if you'd ever have tried building without MOZ_WEBPEECH before. Tested that synthesis still works as-intended. This resolves #1538
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/util/huff_code.c')
-rw-r--r--media/sphinxbase/src/libsphinxbase/util/huff_code.c651
1 files changed, 0 insertions, 651 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/util/huff_code.c b/media/sphinxbase/src/libsphinxbase/util/huff_code.c
deleted file mode 100644
index dd3fb582d..000000000
--- a/media/sphinxbase/src/libsphinxbase/util/huff_code.c
+++ /dev/null
@@ -1,651 +0,0 @@
-/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
-/* ====================================================================
- * Copyright (c) 2009 Carnegie Mellon University. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * This work was supported in part by funding from the Defense Advanced
- * Research Projects Agency and the National Science Foundation of the
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
-
-#include <string.h>
-
-#include "sphinxbase/huff_code.h"
-#include "sphinxbase/ckd_alloc.h"
-#include "sphinxbase/hash_table.h"
-#include "sphinxbase/byteorder.h"
-#include "sphinxbase/heap.h"
-#include "sphinxbase/pio.h"
-#include "sphinxbase/err.h"
-
-typedef struct huff_node_s {
- int nbits;
- struct huff_node_s *l;
- union {
- int32 ival;
- char *sval;
- struct huff_node_s *r;
- } r;
-} huff_node_t;
-
-typedef struct huff_codeword_s {
- union {
- int32 ival;
- char *sval;
- } r;
- uint32 nbits, codeword;
-} huff_codeword_t;
-
-enum {
- HUFF_CODE_INT,
- HUFF_CODE_STR
-};
-
-struct huff_code_s {
- int16 refcount;
- uint8 maxbits;
- uint8 type;
- uint32 *firstcode;
- uint32 *numl;
- huff_codeword_t **syms;
- hash_table_t *codewords;
- FILE *fh;
- bit_encode_t *be;
- int boff;
-};
-
-static huff_node_t *
-huff_node_new_int(int32 val)
-{
- huff_node_t *hn = ckd_calloc(1, sizeof(*hn));
- hn->r.ival = val;
- return hn;
-}
-
-static huff_node_t *
-huff_node_new_str(char const *val)
-{
- huff_node_t *hn = ckd_calloc(1, sizeof(*hn));
- hn->r.sval = ckd_salloc(val);
- return hn;
-}
-
-static huff_node_t *
-huff_node_new_parent(huff_node_t *l, huff_node_t *r)
-{
- huff_node_t *hn = ckd_calloc(1, sizeof(*hn));
- hn->l = l;
- hn->r.r = r;
- /* Propagate maximum bit length. */
- if (r->nbits > l->nbits)
- hn->nbits = r->nbits + 1;
- else
- hn->nbits = l->nbits + 1;
- return hn;
-}
-
-static void
-huff_node_free_int(huff_node_t *root)
-{
- if (root->l) {
- huff_node_free_int(root->l);
- huff_node_free_int(root->r.r);
- }
- ckd_free(root);
-}
-
-static void
-huff_node_free_str(huff_node_t *root, int freestr)
-{
- if (root->l) {
- huff_node_free_str(root->l, freestr);
- huff_node_free_str(root->r.r, freestr);
- }
- else {
- if (freestr)
- ckd_free(root->r.sval);
- }
- ckd_free(root);
-}
-
-static huff_node_t *
-huff_code_build_tree(heap_t *q)
-{
- huff_node_t *root = NULL;
- int32 rf;
-
- while (heap_size(q) > 1) {
- huff_node_t *l, *r, *p;
- int32 lf, rf;
-
- heap_pop(q, (void *)&l, &lf);
- heap_pop(q, (void *)&r, &rf);
- p = huff_node_new_parent(l, r);
- heap_insert(q, p, lf + rf);
- }
- heap_pop(q, (void **)&root, &rf);
- return root;
-}
-
-static void
-huff_code_canonicalize(huff_code_t *hc, huff_node_t *root)
-{
- glist_t agenda;
- uint32 *nextcode;
- int i, ncw;
-
- hc->firstcode = ckd_calloc(hc->maxbits+1, sizeof(*hc->firstcode));
- hc->syms = ckd_calloc(hc->maxbits+1, sizeof(*hc->syms));
- hc->numl = ckd_calloc(hc->maxbits+1, sizeof(*nextcode));
- nextcode = ckd_calloc(hc->maxbits+1, sizeof(*nextcode));
-
- /* Traverse the tree, annotating it with the actual bit
- * lengths, and histogramming them in numl. */
- root->nbits = 0;
- ncw = 0;
- agenda = glist_add_ptr(NULL, root);
- while (agenda) {
- huff_node_t *node = gnode_ptr(agenda);
- agenda = gnode_free(agenda, NULL);
- if (node->l) {
- node->l->nbits = node->nbits + 1;
- agenda = glist_add_ptr(agenda, node->l);
- node->r.r->nbits = node->nbits + 1;
- agenda = glist_add_ptr(agenda, node->r.r);
- }
- else {
- hc->numl[node->nbits]++;
- ncw++;
- }
- }
- /* Create starting codes and symbol tables for each bit length. */
- hc->syms[hc->maxbits] = ckd_calloc(hc->numl[hc->maxbits], sizeof(**hc->syms));
- for (i = hc->maxbits - 1; i > 0; --i) {
- hc->firstcode[i] = (hc->firstcode[i+1] + hc->numl[i+1]) / 2;
- hc->syms[i] = ckd_calloc(hc->numl[i], sizeof(**hc->syms));
- }
- memcpy(nextcode, hc->firstcode, (hc->maxbits + 1) * sizeof(*nextcode));
- /* Traverse the tree again to produce the codebook itself. */
- hc->codewords = hash_table_new(ncw, HASH_CASE_YES);
- agenda = glist_add_ptr(NULL, root);
- while (agenda) {
- huff_node_t *node = gnode_ptr(agenda);
- agenda = gnode_free(agenda, NULL);
- if (node->l) {
- agenda = glist_add_ptr(agenda, node->l);
- agenda = glist_add_ptr(agenda, node->r.r);
- }
- else {
- /* Initialize codebook entry, which also retains symbol pointer. */
- huff_codeword_t *cw;
- uint32 codeword = nextcode[node->nbits] & ((1 << node->nbits) - 1);
- cw = hc->syms[node->nbits] + (codeword - hc->firstcode[node->nbits]);
- cw->nbits = node->nbits;
- cw->r.sval = node->r.sval; /* Will copy ints too... */
- cw->codeword = codeword;
- if (hc->type == HUFF_CODE_INT) {
- hash_table_enter_bkey(hc->codewords,
- (char const *)&cw->r.ival,
- sizeof(cw->r.ival),
- (void *)cw);
- }
- else {
- hash_table_enter(hc->codewords, cw->r.sval, (void *)cw);
- }
- ++nextcode[node->nbits];
- }
- }
- ckd_free(nextcode);
-}
-
-huff_code_t *
-huff_code_build_int(int32 const *values, int32 const *frequencies, int nvals)
-{
- huff_code_t *hc;
- huff_node_t *root;
- heap_t *q;
- int i;
-
- hc = ckd_calloc(1, sizeof(*hc));
- hc->refcount = 1;
- hc->type = HUFF_CODE_INT;
-
- /* Initialize the heap with nodes for each symbol. */
- q = heap_new();
- for (i = 0; i < nvals; ++i) {
- heap_insert(q,
- huff_node_new_int(values[i]),
- frequencies[i]);
- }
-
- /* Now build the tree, which gives us codeword lengths. */
- root = huff_code_build_tree(q);
- heap_destroy(q);
- if (root == NULL || root->nbits > 32) {
- E_ERROR("Huffman trees currently limited to 32 bits\n");
- huff_node_free_int(root);
- huff_code_free(hc);
- return NULL;
- }
-
- /* Build a canonical codebook. */
- hc->maxbits = root->nbits;
- huff_code_canonicalize(hc, root);
-
- /* Tree no longer needed. */
- huff_node_free_int(root);
-
- return hc;
-}
-
-huff_code_t *
-huff_code_build_str(char * const *values, int32 const *frequencies, int nvals)
-{
- huff_code_t *hc;
- huff_node_t *root;
- heap_t *q;
- int i;
-
- hc = ckd_calloc(1, sizeof(*hc));
- hc->refcount = 1;
- hc->type = HUFF_CODE_STR;
-
- /* Initialize the heap with nodes for each symbol. */
- q = heap_new();
- for (i = 0; i < nvals; ++i) {
- heap_insert(q,
- huff_node_new_str(values[i]),
- frequencies[i]);
- }
-
- /* Now build the tree, which gives us codeword lengths. */
- root = huff_code_build_tree(q);
- heap_destroy(q);
- if (root == NULL || root->nbits > 32) {
- E_ERROR("Huffman trees currently limited to 32 bits\n");
- huff_node_free_str(root, TRUE);
- huff_code_free(hc);
- return NULL;
- }
-
- /* Build a canonical codebook. */
- hc->maxbits = root->nbits;
- huff_code_canonicalize(hc, root);
-
- /* Tree no longer needed (note we retain pointers to its strings). */
- huff_node_free_str(root, FALSE);
-
- return hc;
-}
-
-huff_code_t *
-huff_code_read(FILE *infh)
-{
- huff_code_t *hc;
- int i, j;
-
- hc = ckd_calloc(1, sizeof(*hc));
- hc->refcount = 1;
-
- hc->maxbits = fgetc(infh);
- hc->type = fgetc(infh);
-
- /* Two bytes of padding. */
- fgetc(infh);
- fgetc(infh);
-
- /* Allocate stuff. */
- hc->firstcode = ckd_calloc(hc->maxbits + 1, sizeof(*hc->firstcode));
- hc->numl = ckd_calloc(hc->maxbits + 1, sizeof(*hc->numl));
- hc->syms = ckd_calloc(hc->maxbits + 1, sizeof(*hc->syms));
-
- /* Read the symbol tables. */
- hc->codewords = hash_table_new(hc->maxbits, HASH_CASE_YES);
- for (i = 1; i <= hc->maxbits; ++i) {
- if (fread(&hc->firstcode[i], 4, 1, infh) != 1)
- goto error_out;
- SWAP_BE_32(&hc->firstcode[i]);
- if (fread(&hc->numl[i], 4, 1, infh) != 1)
- goto error_out;
- SWAP_BE_32(&hc->numl[i]);
- hc->syms[i] = ckd_calloc(hc->numl[i], sizeof(**hc->syms));
- for (j = 0; j < hc->numl[i]; ++j) {
- huff_codeword_t *cw = &hc->syms[i][j];
- cw->nbits = i;
- cw->codeword = hc->firstcode[i] + j;
- if (hc->type == HUFF_CODE_INT) {
- if (fread(&cw->r.ival, 4, 1, infh) != 1)
- goto error_out;
- SWAP_BE_32(&cw->r.ival);
- hash_table_enter_bkey(hc->codewords,
- (char const *)&cw->r.ival,
- sizeof(cw->r.ival),
- (void *)cw);
- }
- else {
- size_t len;
- cw->r.sval = fread_line(infh, &len);
- cw->r.sval[len-1] = '\0';
- hash_table_enter(hc->codewords, cw->r.sval, (void *)cw);
- }
- }
- }
-
- return hc;
-error_out:
- huff_code_free(hc);
- return NULL;
-}
-
-int
-huff_code_write(huff_code_t *hc, FILE *outfh)
-{
- int i, j;
-
- /* Maximum codeword length */
- fputc(hc->maxbits, outfh);
- /* Symbol type */
- fputc(hc->type, outfh);
- /* Two extra bytes (for future use and alignment) */
- fputc(0, outfh);
- fputc(0, outfh);
- /* For each codeword length: */
- for (i = 1; i <= hc->maxbits; ++i) {
- uint32 val;
-
- /* Starting code, number of codes. */
- val = hc->firstcode[i];
- /* Canonically big-endian (like the data itself) */
- SWAP_BE_32(&val);
- fwrite(&val, 4, 1, outfh);
- val = hc->numl[i];
- SWAP_BE_32(&val);
- fwrite(&val, 4, 1, outfh);
-
- /* Symbols for each code (FIXME: Should compress these too) */
- for (j = 0; j < hc->numl[i]; ++j) {
- if (hc->type == HUFF_CODE_INT) {
- int32 val = hc->syms[i][j].r.ival;
- SWAP_BE_32(&val);
- fwrite(&val, 4, 1, outfh);
- }
- else {
- /* Write them all separated by newlines, so that
- * fgets() will read them for us. */
- fprintf(outfh, "%s\n", hc->syms[i][j].r.sval);
- }
- }
- }
- return 0;
-}
-
-int
-huff_code_dump_codebits(FILE *dumpfh, uint32 nbits, uint32 codeword)
-{
- uint32 i;
-
- for (i = 0; i < nbits; ++i)
- fputc((codeword & (1<<(nbits-i-1))) ? '1' : '0', dumpfh);
- return 0;
-}
-
-int
-huff_code_dump(huff_code_t *hc, FILE *dumpfh)
-{
- int i, j;
-
- /* Print out all codewords. */
- fprintf(dumpfh, "Maximum codeword length: %d\n", hc->maxbits);
- fprintf(dumpfh, "Symbols are %s\n", (hc->type == HUFF_CODE_STR) ? "strings" : "ints");
- fprintf(dumpfh, "Codewords:\n");
- for (i = 1; i <= hc->maxbits; ++i) {
- for (j = 0; j < hc->numl[i]; ++j) {
- if (hc->type == HUFF_CODE_STR)
- fprintf(dumpfh, "%-30s", hc->syms[i][j].r.sval);
- else
- fprintf(dumpfh, "%-30d", hc->syms[i][j].r.ival);
- huff_code_dump_codebits(dumpfh, hc->syms[i][j].nbits,
- hc->syms[i][j].codeword);
- fprintf(dumpfh, "\n");
- }
- }
- return 0;
-}
-
-huff_code_t *
-huff_code_retain(huff_code_t *hc)
-{
- ++hc->refcount;
- return hc;
-}
-
-int
-huff_code_free(huff_code_t *hc)
-{
- int i;
-
- if (hc == NULL)
- return 0;
- if (--hc->refcount > 0)
- return hc->refcount;
- for (i = 0; i <= hc->maxbits; ++i) {
- int j;
- for (j = 0; j < hc->numl[i]; ++j) {
- if (hc->type == HUFF_CODE_STR)
- ckd_free(hc->syms[i][j].r.sval);
- }
- ckd_free(hc->syms[i]);
- }
- ckd_free(hc->firstcode);
- ckd_free(hc->numl);
- ckd_free(hc->syms);
- hash_table_free(hc->codewords);
- ckd_free(hc);
- return 0;
-}
-
-FILE *
-huff_code_attach(huff_code_t *hc, FILE *fh, char const *mode)
-{
- FILE *oldfh = huff_code_detach(hc);
-
- hc->fh = fh;
- if (mode[0] == 'w')
- hc->be = bit_encode_attach(hc->fh);
- return oldfh;
-}
-
-FILE *
-huff_code_detach(huff_code_t *hc)
-{
- FILE *oldfh = hc->fh;
-
- if (hc->be) {
- bit_encode_flush(hc->be);
- bit_encode_free(hc->be);
- hc->be = NULL;
- }
- hc->fh = NULL;
- return oldfh;
-}
-
-int
-huff_code_encode_int(huff_code_t *hc, int32 sym, uint32 *outcw)
-{
- huff_codeword_t *cw;
-
- if (hash_table_lookup_bkey(hc->codewords,
- (char const *)&sym,
- sizeof(sym),
- (void **)&cw) < 0)
- return 0;
- if (hc->be)
- bit_encode_write_cw(hc->be, cw->codeword, cw->nbits);
- if (outcw) *outcw = cw->codeword;
- return cw->nbits;
-}
-
-int
-huff_code_encode_str(huff_code_t *hc, char const *sym, uint32 *outcw)
-{
- huff_codeword_t *cw;
-
- if (hash_table_lookup(hc->codewords,
- sym,
- (void **)&cw) < 0)
- return 0;
- if (hc->be)
- bit_encode_write_cw(hc->be, cw->codeword, cw->nbits);
- if (outcw) *outcw = cw->codeword;
- return cw->nbits;
-}
-
-static huff_codeword_t *
-huff_code_decode_data(huff_code_t *hc, char const **inout_data,
- size_t *inout_data_len, int *inout_offset)
-{
- char const *data = *inout_data;
- char const *end = data + *inout_data_len;
- int offset = *inout_offset;
- uint32 cw;
- int cwlen;
- int byte;
-
- if (data == end)
- return NULL;
- byte = *data++;
- cw = !!(byte & (1 << (7-offset++)));
- cwlen = 1;
- /* printf("%.*x ", cwlen, cw); */
- while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
- ++cwlen;
- cw <<= 1;
- if (offset > 7) {
- if (data == end)
- return NULL;
- byte = *data++;
- offset = 0;
- }
- cw |= !!(byte & (1 << (7-offset++)));
- /* printf("%.*x ", cwlen, cw); */
- }
- if (cwlen > hc->maxbits) /* FAIL: invalid data */
- return NULL;
-
- /* Put the last byte back if there are bits left over. */
- if (offset < 8)
- --data;
- else
- offset = 0;
-
- /* printf("%.*x\n", cwlen, cw); */
- *inout_data_len = end - data;
- *inout_data = data;
- *inout_offset = offset;
- return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
-}
-
-static huff_codeword_t *
-huff_code_decode_fh(huff_code_t *hc)
-{
- uint32 cw;
- int cwlen;
- int byte;
-
- if ((byte = fgetc(hc->fh)) == EOF)
- return NULL;
- cw = !!(byte & (1 << (7-hc->boff++)));
- cwlen = 1;
- /* printf("%.*x ", cwlen, cw); */
- while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
- ++cwlen;
- cw <<= 1;
- if (hc->boff > 7) {
- if ((byte = fgetc(hc->fh)) == EOF)
- return NULL;
- hc->boff = 0;
- }
- cw |= !!(byte & (1 << (7-hc->boff++)));
- /* printf("%.*x ", cwlen, cw); */
- }
- if (cwlen > hc->maxbits) /* FAIL: invalid data */
- return NULL;
-
- /* Put the last byte back if there are bits left over. */
- if (hc->boff < 8)
- ungetc(byte, hc->fh);
- else
- hc->boff = 0;
-
- /* printf("%.*x\n", cwlen, cw); */
- return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
-}
-
-int
-huff_code_decode_int(huff_code_t *hc, int *outval,
- char const **inout_data,
- size_t *inout_data_len, int *inout_offset)
-{
- huff_codeword_t *cw;
-
- if (inout_data)
- cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
- else if (hc->fh)
- cw = huff_code_decode_fh(hc);
- else
- return -1;
-
- if (cw == NULL)
- return -1;
- if (outval)
- *outval = cw->r.ival;
-
- return 0;
-}
-
-char const *
-huff_code_decode_str(huff_code_t *hc,
- char const **inout_data,
- size_t *inout_data_len, int *inout_offset)
-{
- huff_codeword_t *cw;
-
- if (inout_data)
- cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
- else if (hc->fh)
- cw = huff_code_decode_fh(hc);
- else
- return NULL;
-
- if (cw == NULL)
- return NULL;
-
- return cw->r.sval;
-}