summaryrefslogtreecommitdiffstats
path: root/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c')
-rw-r--r--media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c560
1 files changed, 0 insertions, 560 deletions
diff --git a/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c b/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c
deleted file mode 100644
index 080cfa8e6..000000000
--- a/media/sphinxbase/src/libsphinxbase/lm/lm3g_templates.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
-/* ====================================================================
- * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * This work was supported in part by funding from the Defense Advanced
- * Research Projects Agency and the National Science Foundation of the
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
-/*
- * \file lm3g_templates.c Core Sphinx 3-gram code used in
- * DMP/DMP32/ARPA (for now) model code.
- */
-
-#include <assert.h>
-
-/* Locate a specific bigram within a bigram list */
-#define BINARY_SEARCH_THRESH 16
-static int32
-find_bg(bigram_t * bg, int32 n, int32 w)
-{
- int32 i, b, e;
-
- /* Binary search until segment size < threshold */
- b = 0;
- e = n;
- while (e - b > BINARY_SEARCH_THRESH) {
- i = (b + e) >> 1;
- if (bg[i].wid < w)
- b = i + 1;
- else if (bg[i].wid > w)
- e = i;
- else
- return i;
- }
-
- /* Linear search within narrowed segment */
- for (i = b; (i < e) && (bg[i].wid != w); i++);
- return ((i < e) ? i : -1);
-}
-
-static int32
-lm3g_bg_score(NGRAM_MODEL_TYPE *model,
- int32 lw1, int32 lw2, int32 *n_used)
-{
- int32 i, n, b, score;
- bigram_t *bg;
-
- if (lw1 < 0 || model->base.n < 2) {
- *n_used = 1;
- return model->lm3g.unigrams[lw2].prob1.l;
- }
-
- b = FIRST_BG(model, lw1);
- n = FIRST_BG(model, lw1 + 1) - b;
- bg = model->lm3g.bigrams + b;
-
- if ((i = find_bg(bg, n, lw2)) >= 0) {
- /* Access mode = bigram */
- *n_used = 2;
- score = model->lm3g.prob2[bg[i].prob2].l;
- }
- else {
- /* Access mode = unigram */
- *n_used = 1;
- score = model->lm3g.unigrams[lw1].bo_wt1.l + model->lm3g.unigrams[lw2].prob1.l;
- }
-
- return (score);
-}
-
-static void
-load_tginfo(NGRAM_MODEL_TYPE *model, int32 lw1, int32 lw2)
-{
- int32 i, n, b, t;
- bigram_t *bg;
- tginfo_t *tginfo;
-
- /* First allocate space for tg information for bg lw1,lw2 */
- tginfo = (tginfo_t *) listelem_malloc(model->lm3g.le);
- tginfo->w1 = lw1;
- tginfo->tg = NULL;
- tginfo->next = model->lm3g.tginfo[lw2];
- model->lm3g.tginfo[lw2] = tginfo;
-
- /* Locate bigram lw1,lw2 */
- b = model->lm3g.unigrams[lw1].bigrams;
- n = model->lm3g.unigrams[lw1 + 1].bigrams - b;
- bg = model->lm3g.bigrams + b;
-
- if ((n > 0) && ((i = find_bg(bg, n, lw2)) >= 0)) {
- tginfo->bowt = model->lm3g.bo_wt2[bg[i].bo_wt2].l;
-
- /* Find t = Absolute first trigram index for bigram lw1,lw2 */
- b += i; /* b = Absolute index of bigram lw1,lw2 on disk */
- t = FIRST_TG(model, b);
-
- tginfo->tg = model->lm3g.trigrams + t;
-
- /* Find #tg for bigram w1,w2 */
- tginfo->n_tg = FIRST_TG(model, b + 1) - t;
- }
- else { /* No bigram w1,w2 */
- tginfo->bowt = 0;
- tginfo->n_tg = 0;
- }
-}
-
-/* Similar to find_bg */
-static int32
-find_tg(trigram_t * tg, int32 n, uint32 w)
-{
- int32 i, b, e;
-
- b = 0;
- e = n;
- while (e - b > BINARY_SEARCH_THRESH) {
- i = (b + e) >> 1;
- if (tg[i].wid < w)
- b = i + 1;
- else if (tg[i].wid > w)
- e = i;
- else
- return i;
- }
-
- for (i = b; (i < e) && (tg[i].wid != w); i++);
- return ((i < e) ? i : -1);
-}
-
-static int32
-lm3g_tg_score(NGRAM_MODEL_TYPE *model, int32 lw1,
- int32 lw2, int32 lw3, int32 *n_used)
-{
- ngram_model_t *base = &model->base;
- int32 i, n, score;
- trigram_t *tg;
- tginfo_t *tginfo, *prev_tginfo;
-
- if ((base->n < 3) || (lw1 < 0) || (lw2 < 0))
- return (lm3g_bg_score(model, lw2, lw3, n_used));
-
- prev_tginfo = NULL;
- for (tginfo = model->lm3g.tginfo[lw2]; tginfo; tginfo = tginfo->next) {
- if (tginfo->w1 == lw1)
- break;
- prev_tginfo = tginfo;
- }
-
- if (!tginfo) {
- load_tginfo(model, lw1, lw2);
- tginfo = model->lm3g.tginfo[lw2];
- }
- else if (prev_tginfo) {
- prev_tginfo->next = tginfo->next;
- tginfo->next = model->lm3g.tginfo[lw2];
- model->lm3g.tginfo[lw2] = tginfo;
- }
-
- tginfo->used = 1;
-
- /* Trigrams for w1,w2 now pointed to by tginfo */
- n = tginfo->n_tg;
- tg = tginfo->tg;
- if ((i = find_tg(tg, n, lw3)) >= 0) {
- /* Access mode = trigram */
- *n_used = 3;
- score = model->lm3g.prob3[tg[i].prob3].l;
- }
- else {
- score = tginfo->bowt + lm3g_bg_score(model, lw2, lw3, n_used);
- }
-
- return (score);
-}
-
-static int32
-lm3g_template_score(ngram_model_t *base, int32 wid,
- int32 *history, int32 n_hist,
- int32 *n_used)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
- switch (n_hist) {
- case 0:
- /* Access mode: unigram */
- *n_used = 1;
- return model->lm3g.unigrams[wid].prob1.l;
- case 1:
- return lm3g_bg_score(model, history[0], wid, n_used);
- case 2:
- default:
- /* Anything greater than 2 is the same as a trigram for now. */
- return lm3g_tg_score(model, history[1], history[0], wid, n_used);
- }
-}
-
-static int32
-lm3g_template_raw_score(ngram_model_t *base, int32 wid,
- int32 *history, int32 n_hist,
- int32 *n_used)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
- int32 score;
-
- switch (n_hist) {
- case 0:
- /* Access mode: unigram */
- *n_used = 1;
- /* Undo insertion penalty. */
- score = model->lm3g.unigrams[wid].prob1.l - base->log_wip;
- /* Undo language weight. */
- score = (int32)(score / base->lw);
- /* Undo unigram interpolation */
- if (strcmp(base->word_str[wid], "<s>") != 0) { /* FIXME: configurable start_sym */
- /* This operation is numerically unstable, so try to avoid it
- * as possible */
- if (base->log_uniform + base->log_uniform_weight > logmath_get_zero(base->lmath)) {
- score = logmath_log(base->lmath,
- logmath_exp(base->lmath, score)
- - logmath_exp(base->lmath,
- base->log_uniform + base->log_uniform_weight));
- }
- }
- return score;
- case 1:
- score = lm3g_bg_score(model, history[0], wid, n_used);
- break;
- case 2:
- default:
- /* Anything greater than 2 is the same as a trigram for now. */
- score = lm3g_tg_score(model, history[1], history[0], wid, n_used);
- break;
- }
- /* FIXME (maybe): This doesn't undo unigram weighting in backoff cases. */
- return (int32)((score - base->log_wip) / base->lw);
-}
-
-static int32
-lm3g_template_add_ug(ngram_model_t *base,
- int32 wid, int32 lweight)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
- return lm3g_add_ug(base, &model->lm3g, wid, lweight);
-}
-
-static void
-lm3g_template_flush(ngram_model_t *base)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
- lm3g_tginfo_reset(base, &model->lm3g);
-}
-
-typedef struct lm3g_iter_s {
- ngram_iter_t base;
- unigram_t *ug;
- bigram_t *bg;
- trigram_t *tg;
-} lm3g_iter_t;
-
-static ngram_iter_t *
-lm3g_template_iter(ngram_model_t *base, int32 wid,
- int32 *history, int32 n_hist)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
- lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));
-
- ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE);
-
- if (n_hist == 0) {
- /* Unigram is the easiest. */
- itor->ug = model->lm3g.unigrams + wid;
- return (ngram_iter_t *)itor;
- }
- else if (n_hist == 1) {
- int32 i, n, b;
- /* Find the bigram, as in bg_score above (duplicate code...) */
- itor->ug = model->lm3g.unigrams + history[0];
- b = FIRST_BG(model, history[0]);
- n = FIRST_BG(model, history[0] + 1) - b;
- itor->bg = model->lm3g.bigrams + b;
- /* If no such bigram exists then fail. */
- if ((i = find_bg(itor->bg, n, wid)) < 0) {
- ngram_iter_free((ngram_iter_t *)itor);
- return NULL;
- }
- itor->bg += i;
- return (ngram_iter_t *)itor;
- }
- else if (n_hist == 2) {
- int32 i, n;
- tginfo_t *tginfo, *prev_tginfo;
- /* Find the trigram, as in tg_score above (duplicate code...) */
- itor->ug = model->lm3g.unigrams + history[1];
- prev_tginfo = NULL;
- for (tginfo = model->lm3g.tginfo[history[0]];
- tginfo; tginfo = tginfo->next) {
- if (tginfo->w1 == history[1])
- break;
- prev_tginfo = tginfo;
- }
-
- if (!tginfo) {
- load_tginfo(model, history[1], history[0]);
- tginfo = model->lm3g.tginfo[history[0]];
- }
- else if (prev_tginfo) {
- prev_tginfo->next = tginfo->next;
- tginfo->next = model->lm3g.tginfo[history[0]];
- model->lm3g.tginfo[history[0]] = tginfo;
- }
-
- tginfo->used = 1;
-
- /* Trigrams for w1,w2 now pointed to by tginfo */
- n = tginfo->n_tg;
- itor->tg = tginfo->tg;
- if ((i = find_tg(itor->tg, n, wid)) >= 0) {
- itor->tg += i;
- /* Now advance the bigram pointer accordingly. FIXME:
- * Note that we actually already found the relevant bigram
- * in load_tginfo. */
- itor->bg = model->lm3g.bigrams;
- while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))
- <= (itor->tg - model->lm3g.trigrams))
- ++itor->bg;
- return (ngram_iter_t *)itor;
- }
- else {
- ngram_iter_free((ngram_iter_t *)itor);
- return (ngram_iter_t *)NULL;
- }
- }
- else {
- /* Should not happen. */
- assert(n_hist == 0); /* Guaranteed to fail. */
- ngram_iter_free((ngram_iter_t *)itor);
- return NULL;
- }
-}
-
-static ngram_iter_t *
-lm3g_template_mgrams(ngram_model_t *base, int m)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
- lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));
- ngram_iter_init((ngram_iter_t *)itor, base, m, FALSE);
-
- itor->ug = model->lm3g.unigrams;
- itor->bg = model->lm3g.bigrams;
- itor->tg = model->lm3g.trigrams;
-
- /* Advance bigram pointer to match first trigram. */
- if (m > 1 && base->n_counts[1] > 1) {
- while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))
- <= (itor->tg - model->lm3g.trigrams))
- ++itor->bg;
- }
-
- /* Advance unigram pointer to match first bigram. */
- if (m > 0 && base->n_counts[0] > 1) {
- while (itor->ug[1].bigrams <= (itor->bg - model->lm3g.bigrams))
- ++itor->ug;
- }
-
- return (ngram_iter_t *)itor;
-}
-
-static ngram_iter_t *
-lm3g_template_successors(ngram_iter_t *bitor)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)bitor->model;
- lm3g_iter_t *from = (lm3g_iter_t *)bitor;
- lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));
-
- itor->ug = from->ug;
- switch (bitor->m) {
- case 0:
- /* Next itor bigrams is the same as this itor bigram or
- itor bigrams is more than total count. This means no successors */
- if (((itor->ug + 1) - model->lm3g.unigrams < bitor->model->n_counts[0] &&
- itor->ug->bigrams == (itor->ug + 1)->bigrams) ||
- itor->ug->bigrams == bitor->model->n_counts[1])
- goto done;
-
- /* Start iterating from first bigram successor of from->ug. */
- itor->bg = model->lm3g.bigrams + itor->ug->bigrams;
- break;
- case 1:
- itor->bg = from->bg;
-
- /* This indicates no successors */
- if (((itor->bg + 1) - model->lm3g.bigrams < bitor->model->n_counts[1] &&
- FIRST_TG (model, itor->bg - model->lm3g.bigrams) ==
- FIRST_TG (model, (itor->bg + 1) - model->lm3g.bigrams)) ||
- FIRST_TG (model, itor->bg - model->lm3g.bigrams) == bitor->model->n_counts[2])
- goto done;
-
- /* Start iterating from first trigram successor of from->bg. */
- itor->tg = (model->lm3g.trigrams
- + FIRST_TG(model, (itor->bg - model->lm3g.bigrams)));
-#if 0
- printf("%s %s => %d (%s)\n",
- model->base.word_str[itor->ug - model->lm3g.unigrams],
- model->base.word_str[itor->bg->wid],
- FIRST_TG(model, (itor->bg - model->lm3g.bigrams)),
- model->base.word_str[itor->tg->wid]);
-#endif
- break;
- case 2:
- default:
- /* All invalid! */
- goto done;
- }
-
- ngram_iter_init((ngram_iter_t *)itor, bitor->model, bitor->m + 1, TRUE);
- return (ngram_iter_t *)itor;
- done:
- ckd_free(itor);
- return NULL;
-}
-
-static int32 const *
-lm3g_template_iter_get(ngram_iter_t *base,
- int32 *out_score, int32 *out_bowt)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model;
- lm3g_iter_t *itor = (lm3g_iter_t *)base;
-
- base->wids[0] = itor->ug - model->lm3g.unigrams;
- if (itor->bg) base->wids[1] = itor->bg->wid;
- if (itor->tg) base->wids[2] = itor->tg->wid;
-#if 0
- printf("itor_get: %d %d %d\n", base->wids[0], base->wids[1], base->wids[2]);
-#endif
-
- switch (base->m) {
- case 0:
- *out_score = itor->ug->prob1.l;
- *out_bowt = itor->ug->bo_wt1.l;
- break;
- case 1:
- *out_score = model->lm3g.prob2[itor->bg->prob2].l;
- if (model->lm3g.bo_wt2)
- *out_bowt = model->lm3g.bo_wt2[itor->bg->bo_wt2].l;
- else
- *out_bowt = 0;
- break;
- case 2:
- *out_score = model->lm3g.prob3[itor->tg->prob3].l;
- *out_bowt = 0;
- break;
- default: /* Should not happen. */
- return NULL;
- }
- return base->wids;
-}
-
-static ngram_iter_t *
-lm3g_template_iter_next(ngram_iter_t *base)
-{
- NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base->model;
- lm3g_iter_t *itor = (lm3g_iter_t *)base;
-
- switch (base->m) {
- case 0:
- ++itor->ug;
- /* Check for end condition. */
- if (itor->ug - model->lm3g.unigrams >= base->model->n_counts[0])
- goto done;
- break;
- case 1:
- ++itor->bg;
- /* Check for end condition. */
- if (itor->bg - model->lm3g.bigrams >= base->model->n_counts[1])
- goto done;
- /* Advance unigram pointer if necessary in order to get one
- * that points to this bigram. */
- while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) {
- /* Stop if this is a successor iterator, since we don't
- * want a new unigram. */
- if (base->successor)
- goto done;
- ++itor->ug;
- if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) {
- E_ERROR("Bigram %d has no valid unigram parent\n",
- itor->bg - model->lm3g.bigrams);
- goto done;
- }
- }
- break;
- case 2:
- ++itor->tg;
- /* Check for end condition. */
- if (itor->tg - model->lm3g.trigrams >= base->model->n_counts[2])
- goto done;
- /* Advance bigram pointer if necessary. */
- while (itor->tg - model->lm3g.trigrams >=
- FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))) {
- if (base->successor)
- goto done;
- ++itor->bg;
- if (itor->bg == model->lm3g.bigrams + base->model->n_counts[1]) {
- E_ERROR("Trigram %d has no valid bigram parent\n",
- itor->tg - model->lm3g.trigrams);
-
- goto done;
- }
- }
- /* Advance unigram pointer if necessary. */
- while (itor->bg - model->lm3g.bigrams >= itor->ug[1].bigrams) {
- ++itor->ug;
- if (itor->ug == model->lm3g.unigrams + base->model->n_counts[0]) {
- E_ERROR("Trigram %d has no valid unigram parent\n",
- itor->tg - model->lm3g.trigrams);
- goto done;
- }
- }
- break;
- default: /* Should not happen. */
- goto done;
- }
-
- return (ngram_iter_t *)itor;
-done:
- ngram_iter_free(base);
- return NULL;
-}
-
-static void
-lm3g_template_iter_free(ngram_iter_t *base)
-{
- ckd_free(base);
-}