summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/util/huff_code.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/util/huff_code.c')
-rw-r--r--media/sphinxbase/src/libsphinxbase/util/huff_code.c651
1 files changed, 0 insertions, 651 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/util/huff_code.c b/media/sphinxbase/src/libsphinxbase/util/huff_code.c
deleted file mode 100644
index dd3fb582d..000000000
--- a/media/sphinxbase/src/libsphinxbase/util/huff_code.c
+++ /dev/null
@@ -1,651 +0,0 @@
-/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
-/* ====================================================================
- * Copyright (c) 2009 Carnegie Mellon University. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * This work was supported in part by funding from the Defense Advanced
- * Research Projects Agency and the National Science Foundation of the
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
-
-#include <string.h>
-
-#include "sphinxbase/huff_code.h"
-#include "sphinxbase/ckd_alloc.h"
-#include "sphinxbase/hash_table.h"
-#include "sphinxbase/byteorder.h"
-#include "sphinxbase/heap.h"
-#include "sphinxbase/pio.h"
-#include "sphinxbase/err.h"
-
-typedef struct huff_node_s {
- int nbits;
- struct huff_node_s *l;
- union {
- int32 ival;
- char *sval;
- struct huff_node_s *r;
- } r;
-} huff_node_t;
-
-typedef struct huff_codeword_s {
- union {
- int32 ival;
- char *sval;
- } r;
- uint32 nbits, codeword;
-} huff_codeword_t;
-
-enum {
- HUFF_CODE_INT,
- HUFF_CODE_STR
-};
-
-struct huff_code_s {
- int16 refcount;
- uint8 maxbits;
- uint8 type;
- uint32 *firstcode;
- uint32 *numl;
- huff_codeword_t **syms;
- hash_table_t *codewords;
- FILE *fh;
- bit_encode_t *be;
- int boff;
-};
-
-static huff_node_t *
-huff_node_new_int(int32 val)
-{
- huff_node_t *hn = ckd_calloc(1, sizeof(*hn));
- hn->r.ival = val;
- return hn;
-}
-
-static huff_node_t *
-huff_node_new_str(char const *val)
-{
- huff_node_t *hn = ckd_calloc(1, sizeof(*hn));
- hn->r.sval = ckd_salloc(val);
- return hn;
-}
-
-static huff_node_t *
-huff_node_new_parent(huff_node_t *l, huff_node_t *r)
-{
- huff_node_t *hn = ckd_calloc(1, sizeof(*hn));
- hn->l = l;
- hn->r.r = r;
- /* Propagate maximum bit length. */
- if (r->nbits > l->nbits)
- hn->nbits = r->nbits + 1;
- else
- hn->nbits = l->nbits + 1;
- return hn;
-}
-
-static void
-huff_node_free_int(huff_node_t *root)
-{
- if (root->l) {
- huff_node_free_int(root->l);
- huff_node_free_int(root->r.r);
- }
- ckd_free(root);
-}
-
-static void
-huff_node_free_str(huff_node_t *root, int freestr)
-{
- if (root->l) {
- huff_node_free_str(root->l, freestr);
- huff_node_free_str(root->r.r, freestr);
- }
- else {
- if (freestr)
- ckd_free(root->r.sval);
- }
- ckd_free(root);
-}
-
-static huff_node_t *
-huff_code_build_tree(heap_t *q)
-{
- huff_node_t *root = NULL;
- int32 rf;
-
- while (heap_size(q) > 1) {
- huff_node_t *l, *r, *p;
- int32 lf, rf;
-
- heap_pop(q, (void *)&l, &lf);
- heap_pop(q, (void *)&r, &rf);
- p = huff_node_new_parent(l, r);
- heap_insert(q, p, lf + rf);
- }
- heap_pop(q, (void **)&root, &rf);
- return root;
-}
-
-static void
-huff_code_canonicalize(huff_code_t *hc, huff_node_t *root)
-{
- glist_t agenda;
- uint32 *nextcode;
- int i, ncw;
-
- hc->firstcode = ckd_calloc(hc->maxbits+1, sizeof(*hc->firstcode));
- hc->syms = ckd_calloc(hc->maxbits+1, sizeof(*hc->syms));
- hc->numl = ckd_calloc(hc->maxbits+1, sizeof(*nextcode));
- nextcode = ckd_calloc(hc->maxbits+1, sizeof(*nextcode));
-
- /* Traverse the tree, annotating it with the actual bit
- * lengths, and histogramming them in numl. */
- root->nbits = 0;
- ncw = 0;
- agenda = glist_add_ptr(NULL, root);
- while (agenda) {
- huff_node_t *node = gnode_ptr(agenda);
- agenda = gnode_free(agenda, NULL);
- if (node->l) {
- node->l->nbits = node->nbits + 1;
- agenda = glist_add_ptr(agenda, node->l);
- node->r.r->nbits = node->nbits + 1;
- agenda = glist_add_ptr(agenda, node->r.r);
- }
- else {
- hc->numl[node->nbits]++;
- ncw++;
- }
- }
- /* Create starting codes and symbol tables for each bit length. */
- hc->syms[hc->maxbits] = ckd_calloc(hc->numl[hc->maxbits], sizeof(**hc->syms));
- for (i = hc->maxbits - 1; i > 0; --i) {
- hc->firstcode[i] = (hc->firstcode[i+1] + hc->numl[i+1]) / 2;
- hc->syms[i] = ckd_calloc(hc->numl[i], sizeof(**hc->syms));
- }
- memcpy(nextcode, hc->firstcode, (hc->maxbits + 1) * sizeof(*nextcode));
- /* Traverse the tree again to produce the codebook itself. */
- hc->codewords = hash_table_new(ncw, HASH_CASE_YES);
- agenda = glist_add_ptr(NULL, root);
- while (agenda) {
- huff_node_t *node = gnode_ptr(agenda);
- agenda = gnode_free(agenda, NULL);
- if (node->l) {
- agenda = glist_add_ptr(agenda, node->l);
- agenda = glist_add_ptr(agenda, node->r.r);
- }
- else {
- /* Initialize codebook entry, which also retains symbol pointer. */
- huff_codeword_t *cw;
- uint32 codeword = nextcode[node->nbits] & ((1 << node->nbits) - 1);
- cw = hc->syms[node->nbits] + (codeword - hc->firstcode[node->nbits]);
- cw->nbits = node->nbits;
- cw->r.sval = node->r.sval; /* Will copy ints too... */
- cw->codeword = codeword;
- if (hc->type == HUFF_CODE_INT) {
- hash_table_enter_bkey(hc->codewords,
- (char const *)&cw->r.ival,
- sizeof(cw->r.ival),
- (void *)cw);
- }
- else {
- hash_table_enter(hc->codewords, cw->r.sval, (void *)cw);
- }
- ++nextcode[node->nbits];
- }
- }
- ckd_free(nextcode);
-}
-
-huff_code_t *
-huff_code_build_int(int32 const *values, int32 const *frequencies, int nvals)
-{
- huff_code_t *hc;
- huff_node_t *root;
- heap_t *q;
- int i;
-
- hc = ckd_calloc(1, sizeof(*hc));
- hc->refcount = 1;
- hc->type = HUFF_CODE_INT;
-
- /* Initialize the heap with nodes for each symbol. */
- q = heap_new();
- for (i = 0; i < nvals; ++i) {
- heap_insert(q,
- huff_node_new_int(values[i]),
- frequencies[i]);
- }
-
- /* Now build the tree, which gives us codeword lengths. */
- root = huff_code_build_tree(q);
- heap_destroy(q);
- if (root == NULL || root->nbits > 32) {
- E_ERROR("Huffman trees currently limited to 32 bits\n");
- huff_node_free_int(root);
- huff_code_free(hc);
- return NULL;
- }
-
- /* Build a canonical codebook. */
- hc->maxbits = root->nbits;
- huff_code_canonicalize(hc, root);
-
- /* Tree no longer needed. */
- huff_node_free_int(root);
-
- return hc;
-}
-
-huff_code_t *
-huff_code_build_str(char * const *values, int32 const *frequencies, int nvals)
-{
- huff_code_t *hc;
- huff_node_t *root;
- heap_t *q;
- int i;
-
- hc = ckd_calloc(1, sizeof(*hc));
- hc->refcount = 1;
- hc->type = HUFF_CODE_STR;
-
- /* Initialize the heap with nodes for each symbol. */
- q = heap_new();
- for (i = 0; i < nvals; ++i) {
- heap_insert(q,
- huff_node_new_str(values[i]),
- frequencies[i]);
- }
-
- /* Now build the tree, which gives us codeword lengths. */
- root = huff_code_build_tree(q);
- heap_destroy(q);
- if (root == NULL || root->nbits > 32) {
- E_ERROR("Huffman trees currently limited to 32 bits\n");
- huff_node_free_str(root, TRUE);
- huff_code_free(hc);
- return NULL;
- }
-
- /* Build a canonical codebook. */
- hc->maxbits = root->nbits;
- huff_code_canonicalize(hc, root);
-
- /* Tree no longer needed (note we retain pointers to its strings). */
- huff_node_free_str(root, FALSE);
-
- return hc;
-}
-
-huff_code_t *
-huff_code_read(FILE *infh)
-{
- huff_code_t *hc;
- int i, j;
-
- hc = ckd_calloc(1, sizeof(*hc));
- hc->refcount = 1;
-
- hc->maxbits = fgetc(infh);
- hc->type = fgetc(infh);
-
- /* Two bytes of padding. */
- fgetc(infh);
- fgetc(infh);
-
- /* Allocate stuff. */
- hc->firstcode = ckd_calloc(hc->maxbits + 1, sizeof(*hc->firstcode));
- hc->numl = ckd_calloc(hc->maxbits + 1, sizeof(*hc->numl));
- hc->syms = ckd_calloc(hc->maxbits + 1, sizeof(*hc->syms));
-
- /* Read the symbol tables. */
- hc->codewords = hash_table_new(hc->maxbits, HASH_CASE_YES);
- for (i = 1; i <= hc->maxbits; ++i) {
- if (fread(&hc->firstcode[i], 4, 1, infh) != 1)
- goto error_out;
- SWAP_BE_32(&hc->firstcode[i]);
- if (fread(&hc->numl[i], 4, 1, infh) != 1)
- goto error_out;
- SWAP_BE_32(&hc->numl[i]);
- hc->syms[i] = ckd_calloc(hc->numl[i], sizeof(**hc->syms));
- for (j = 0; j < hc->numl[i]; ++j) {
- huff_codeword_t *cw = &hc->syms[i][j];
- cw->nbits = i;
- cw->codeword = hc->firstcode[i] + j;
- if (hc->type == HUFF_CODE_INT) {
- if (fread(&cw->r.ival, 4, 1, infh) != 1)
- goto error_out;
- SWAP_BE_32(&cw->r.ival);
- hash_table_enter_bkey(hc->codewords,
- (char const *)&cw->r.ival,
- sizeof(cw->r.ival),
- (void *)cw);
- }
- else {
- size_t len;
- cw->r.sval = fread_line(infh, &len);
- cw->r.sval[len-1] = '\0';
- hash_table_enter(hc->codewords, cw->r.sval, (void *)cw);
- }
- }
- }
-
- return hc;
-error_out:
- huff_code_free(hc);
- return NULL;
-}
-
-int
-huff_code_write(huff_code_t *hc, FILE *outfh)
-{
- int i, j;
-
- /* Maximum codeword length */
- fputc(hc->maxbits, outfh);
- /* Symbol type */
- fputc(hc->type, outfh);
- /* Two extra bytes (for future use and alignment) */
- fputc(0, outfh);
- fputc(0, outfh);
- /* For each codeword length: */
- for (i = 1; i <= hc->maxbits; ++i) {
- uint32 val;
-
- /* Starting code, number of codes. */
- val = hc->firstcode[i];
- /* Canonically big-endian (like the data itself) */
- SWAP_BE_32(&val);
- fwrite(&val, 4, 1, outfh);
- val = hc->numl[i];
- SWAP_BE_32(&val);
- fwrite(&val, 4, 1, outfh);
-
- /* Symbols for each code (FIXME: Should compress these too) */
- for (j = 0; j < hc->numl[i]; ++j) {
- if (hc->type == HUFF_CODE_INT) {
- int32 val = hc->syms[i][j].r.ival;
- SWAP_BE_32(&val);
- fwrite(&val, 4, 1, outfh);
- }
- else {
- /* Write them all separated by newlines, so that
- * fgets() will read them for us. */
- fprintf(outfh, "%s\n", hc->syms[i][j].r.sval);
- }
- }
- }
- return 0;
-}
-
-int
-huff_code_dump_codebits(FILE *dumpfh, uint32 nbits, uint32 codeword)
-{
- uint32 i;
-
- for (i = 0; i < nbits; ++i)
- fputc((codeword & (1<<(nbits-i-1))) ? '1' : '0', dumpfh);
- return 0;
-}
-
-int
-huff_code_dump(huff_code_t *hc, FILE *dumpfh)
-{
- int i, j;
-
- /* Print out all codewords. */
- fprintf(dumpfh, "Maximum codeword length: %d\n", hc->maxbits);
- fprintf(dumpfh, "Symbols are %s\n", (hc->type == HUFF_CODE_STR) ? "strings" : "ints");
- fprintf(dumpfh, "Codewords:\n");
- for (i = 1; i <= hc->maxbits; ++i) {
- for (j = 0; j < hc->numl[i]; ++j) {
- if (hc->type == HUFF_CODE_STR)
- fprintf(dumpfh, "%-30s", hc->syms[i][j].r.sval);
- else
- fprintf(dumpfh, "%-30d", hc->syms[i][j].r.ival);
- huff_code_dump_codebits(dumpfh, hc->syms[i][j].nbits,
- hc->syms[i][j].codeword);
- fprintf(dumpfh, "\n");
- }
- }
- return 0;
-}
-
-huff_code_t *
-huff_code_retain(huff_code_t *hc)
-{
- ++hc->refcount;
- return hc;
-}
-
-int
-huff_code_free(huff_code_t *hc)
-{
- int i;
-
- if (hc == NULL)
- return 0;
- if (--hc->refcount > 0)
- return hc->refcount;
- for (i = 0; i <= hc->maxbits; ++i) {
- int j;
- for (j = 0; j < hc->numl[i]; ++j) {
- if (hc->type == HUFF_CODE_STR)
- ckd_free(hc->syms[i][j].r.sval);
- }
- ckd_free(hc->syms[i]);
- }
- ckd_free(hc->firstcode);
- ckd_free(hc->numl);
- ckd_free(hc->syms);
- hash_table_free(hc->codewords);
- ckd_free(hc);
- return 0;
-}
-
-FILE *
-huff_code_attach(huff_code_t *hc, FILE *fh, char const *mode)
-{
- FILE *oldfh = huff_code_detach(hc);
-
- hc->fh = fh;
- if (mode[0] == 'w')
- hc->be = bit_encode_attach(hc->fh);
- return oldfh;
-}
-
-FILE *
-huff_code_detach(huff_code_t *hc)
-{
- FILE *oldfh = hc->fh;
-
- if (hc->be) {
- bit_encode_flush(hc->be);
- bit_encode_free(hc->be);
- hc->be = NULL;
- }
- hc->fh = NULL;
- return oldfh;
-}
-
-int
-huff_code_encode_int(huff_code_t *hc, int32 sym, uint32 *outcw)
-{
- huff_codeword_t *cw;
-
- if (hash_table_lookup_bkey(hc->codewords,
- (char const *)&sym,
- sizeof(sym),
- (void **)&cw) < 0)
- return 0;
- if (hc->be)
- bit_encode_write_cw(hc->be, cw->codeword, cw->nbits);
- if (outcw) *outcw = cw->codeword;
- return cw->nbits;
-}
-
-int
-huff_code_encode_str(huff_code_t *hc, char const *sym, uint32 *outcw)
-{
- huff_codeword_t *cw;
-
- if (hash_table_lookup(hc->codewords,
- sym,
- (void **)&cw) < 0)
- return 0;
- if (hc->be)
- bit_encode_write_cw(hc->be, cw->codeword, cw->nbits);
- if (outcw) *outcw = cw->codeword;
- return cw->nbits;
-}
-
-static huff_codeword_t *
-huff_code_decode_data(huff_code_t *hc, char const **inout_data,
- size_t *inout_data_len, int *inout_offset)
-{
- char const *data = *inout_data;
- char const *end = data + *inout_data_len;
- int offset = *inout_offset;
- uint32 cw;
- int cwlen;
- int byte;
-
- if (data == end)
- return NULL;
- byte = *data++;
- cw = !!(byte & (1 << (7-offset++)));
- cwlen = 1;
- /* printf("%.*x ", cwlen, cw); */
- while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
- ++cwlen;
- cw <<= 1;
- if (offset > 7) {
- if (data == end)
- return NULL;
- byte = *data++;
- offset = 0;
- }
- cw |= !!(byte & (1 << (7-offset++)));
- /* printf("%.*x ", cwlen, cw); */
- }
- if (cwlen > hc->maxbits) /* FAIL: invalid data */
- return NULL;
-
- /* Put the last byte back if there are bits left over. */
- if (offset < 8)
- --data;
- else
- offset = 0;
-
- /* printf("%.*x\n", cwlen, cw); */
- *inout_data_len = end - data;
- *inout_data = data;
- *inout_offset = offset;
- return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
-}
-
-static huff_codeword_t *
-huff_code_decode_fh(huff_code_t *hc)
-{
- uint32 cw;
- int cwlen;
- int byte;
-
- if ((byte = fgetc(hc->fh)) == EOF)
- return NULL;
- cw = !!(byte & (1 << (7-hc->boff++)));
- cwlen = 1;
- /* printf("%.*x ", cwlen, cw); */
- while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
- ++cwlen;
- cw <<= 1;
- if (hc->boff > 7) {
- if ((byte = fgetc(hc->fh)) == EOF)
- return NULL;
- hc->boff = 0;
- }
- cw |= !!(byte & (1 << (7-hc->boff++)));
- /* printf("%.*x ", cwlen, cw); */
- }
- if (cwlen > hc->maxbits) /* FAIL: invalid data */
- return NULL;
-
- /* Put the last byte back if there are bits left over. */
- if (hc->boff < 8)
- ungetc(byte, hc->fh);
- else
- hc->boff = 0;
-
- /* printf("%.*x\n", cwlen, cw); */
- return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
-}
-
-int
-huff_code_decode_int(huff_code_t *hc, int *outval,
- char const **inout_data,
- size_t *inout_data_len, int *inout_offset)
-{
- huff_codeword_t *cw;
-
- if (inout_data)
- cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
- else if (hc->fh)
- cw = huff_code_decode_fh(hc);
- else
- return -1;
-
- if (cw == NULL)
- return -1;
- if (outval)
- *outval = cw->r.ival;
-
- return 0;
-}
-
-char const *
-huff_code_decode_str(huff_code_t *hc,
- char const **inout_data,
- size_t *inout_data_len, int *inout_offset)
-{
- huff_codeword_t *cw;
-
- if (inout_data)
- cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
- else if (hc->fh)
- cw = huff_code_decode_fh(hc);
- else
- return NULL;
-
- if (cw == NULL)
- return NULL;
-
- return cw->r.sval;
-}