summaryrefslogtreecommitdiffstats
path: root/media/pocketsphinx/src/ps_lattice_internal.h
blob: 8b5f60a3b4265ec1fdab6cdf9087ebc6cadaa5b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
 * Copyright (c) 2008 Carnegie Mellon University.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * This work was supported in part by funding from the Defense Advanced 
 * Research Projects Agency and the National Science Foundation of the 
 * United States of America, and the CMU Sphinx Speech Consortium.
 *
 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ====================================================================
 *
 */

/**
 * @file ps_lattice_internal.h Word graph search implementation
 */

#ifndef __PS_LATTICE_INTERNAL_H__
#define __PS_LATTICE_INTERNAL_H__

/**
 * Linked list of DAG link pointers.
 *
 * Because the same link structure is used for forward and reverse
 * links, as well as for the agenda used in bestpath search, we can't
 * store the list pointer inside latlink_t.  We could use glist_t
 * here, but it wastes 4 bytes per entry on 32-bit machines.
 */
typedef struct latlink_list_s {
    ps_latlink_t *link;
    struct latlink_list_s *next;
} latlink_list_t;

/**
 * Word graph structure used in bestpath/nbest search.
 */
struct ps_lattice_s {
    int refcount;      /**< Reference count. */

    logmath_t *lmath;    /**< Log-math object. */
    ps_search_t *search; /**< Search (if generated by search). */
    dict_t *dict;	 /**< Dictionary for this DAG. */
    int32 silence;       /**< Silence word ID. */
    int32 frate;         /**< Frame rate. */

    ps_latnode_t *nodes;  /**< List of all nodes. */
    ps_latnode_t *start;  /**< Starting node. */
    ps_latnode_t *end;    /**< Ending node. */

    frame_idx_t n_frames;    /**< Number of frames for this utterance. */
    int16 n_nodes;     /**< Number of nodes in this lattice. */
    int32 final_node_ascr; /**< Acoustic score of implicit link exiting final node. */
    int32 norm;        /**< Normalizer for posterior probabilities. */
    char *hyp_str;     /**< Current hypothesis string. */

    listelem_alloc_t *latnode_alloc;     /**< Node allocator for this DAG. */
    listelem_alloc_t *latlink_alloc;     /**< Link allocator for this DAG. */
    listelem_alloc_t *latlink_list_alloc; /**< List element allocator for this DAG. */

    /* This will probably be replaced with a heap. */
    latlink_list_t *q_head; /**< Queue of links for traversal. */
    latlink_list_t *q_tail; /**< Queue of links for traversal. */
};

/**
 * Links between DAG nodes.
 *
 * A link corresponds to a single hypothesized instance of a word with
 * a given start and end point.

 */
struct ps_latlink_s {
    struct ps_latnode_s *from;	/**< From node */
    struct ps_latnode_s *to;	/**< To node */
    struct ps_latlink_s *best_prev;
    int32 ascr;			/**< Score for from->wid (from->sf to this->ef) */
    int32 path_scr;		/**< Best path score from root of DAG */
    frame_idx_t ef;			/**< Ending frame of this word  */
    int32 alpha;                /**< Forward probability of this link P(w,o_1^{ef}) */
    int32 beta;                 /**< Backward probability of this link P(w|o_{ef+1}^T) */
};

/**
 * DAG nodes.
 *
 * A node corresponds to a number of hypothesized instances of a word
 * which all share the same starting point.
 */
struct ps_latnode_s {
    int32 id;			/**< Unique id for this node */
    int32 wid;			/**< Dictionary word id */
    int32 basewid;		/**< Dictionary base word id */
    /* FIXME: These are (ab)used to store backpointer indices, therefore they MUST be 32 bits. */
    int32 fef;			/**< First end frame */
    int32 lef;			/**< Last end frame */
    frame_idx_t sf;			/**< Start frame */
    int16 reachable;		/**< From \verbatim </s> \endverbatim or \verbatim <s> \endverbatim */
    int32 node_id;		/**< Node from fsg model, used to map lattice back to model */
    union {
        glist_t velist;         /**< List of history entries with different lmstate (tst only) */
	int32 fanin;		/**< Number nodes with links to this node */
	int32 rem_score;	/**< Estimated best score from node.sf to end */
	int32 best_exit;	/**< Best exit score (used for final nodes only) */
    } info;
    latlink_list_t *exits;      /**< Links out of this node */
    latlink_list_t *entries;    /**< Links into this node */

    struct ps_latnode_s *alt;   /**< Node with alternate pronunciation for this word */
    struct ps_latnode_s *next;	/**< Next node in DAG (no ordering implied) */
};

/**
 * Segmentation "iterator" for backpointer table results.
 */
typedef struct dag_seg_s {
    ps_seg_t base;       /**< Base structure. */
    ps_latlink_t **links;   /**< Array of lattice links. */
    int32 norm;     /**< Normalizer for posterior probabilities. */
    int16 n_links;  /**< Number of lattice links. */
    int16 cur;      /**< Current position in bpidx. */
} dag_seg_t;

/**
 * Partial path structure used in N-best (A*) search.
 *
 * Each partial path (latpath_t) is constructed by extending another
 * partial path--parent--by one node.
 */
typedef struct ps_latpath_s {
    ps_latnode_t *node;            /**< Node ending this path. */
    struct ps_latpath_s *parent;   /**< Previous element in this path. */
    struct ps_latpath_s *next;     /**< Pointer to next path in list of paths. */
    int32 score;                  /**< Exact score from start node up to node->sf. */
} ps_latpath_t;

/**
 * A* search structure.
 */
typedef struct ps_astar_s {
    ps_lattice_t *dag;
    ngram_model_t *lmset;
    float32 lwf;

    frame_idx_t sf;
    frame_idx_t ef;
    int32 w1;
    int32 w2;

    int32 n_hyp_tried;
    int32 n_hyp_insert;
    int32 n_hyp_reject;
    int32 insert_depth;
    int32 n_path;

    ps_latpath_t *path_list;
    ps_latpath_t *path_tail;
    ps_latpath_t *top;

    glist_t hyps;	             /**< List of hypothesis strings. */
    listelem_alloc_t *latpath_alloc; /**< Path allocator for N-best search. */
} ps_astar_t;

/**
 * Segmentation "iterator" for A* search results.
 */
typedef struct astar_seg_s {
    ps_seg_t base;
    ps_latnode_t **nodes;
    int n_nodes;
    int cur;
} astar_seg_t;

/**
 * Construct an empty word graph with reference to a search structure.
 */
ps_lattice_t *ps_lattice_init_search(ps_search_t *search, int n_frame);

/**
 * Insert penalty for fillers
 */
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen);

/**
 * Remove nodes marked as unreachable.
 */
void ps_lattice_delete_unreachable(ps_lattice_t *dag);

/**
 * Add an edge to the traversal queue.
 */
void ps_lattice_pushq(ps_lattice_t *dag, ps_latlink_t *link);

/**
 * Remove an edge from the traversal queue.
 */
ps_latlink_t *ps_lattice_popq(ps_lattice_t *dag);

/**
 * Clear and reset the traversal queue.
 */
void ps_lattice_delq(ps_lattice_t *dag);

/**
 * Create a new lattice link element.
 */
latlink_list_t *latlink_list_new(ps_lattice_t *dag, ps_latlink_t *link,
                                 latlink_list_t *next);

/**
 * Get hypothesis string after bestpath search.
 */
char const *ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link);

/**
 * Get hypothesis segmentation iterator after bestpath search.
 */
ps_seg_t *ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link,
                              float32 lwf);

/**
 * Begin N-Gram based A* search on a word graph.
 *
 * @param sf Starting frame for N-best search.
 * @param ef Ending frame for N-best search, or -1 for last frame.
 * @param w1 First context word, or -1 for none.
 * @param w2 Second context word, or -1 for none.
 * @return 0 for success, <0 on error.
 */
ps_astar_t *ps_astar_start(ps_lattice_t *dag,
                           ngram_model_t *lmset,
                           float32 lwf,
                           int sf, int ef,
                           int w1, int w2);

/**
 * Find next best hypothesis of A* on a word graph.
 *
 * @return a complete path, or NULL if no more hypotheses exist.
 */
ps_latpath_t *ps_astar_next(ps_astar_t *nbest);

/**
 * Finish N-best search, releasing resources associated with it.
 */
void ps_astar_finish(ps_astar_t *nbest);

/**
 * Get hypothesis string from A* search.
 */
char const *ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path);

/**
 * Get hypothesis segmentation from A* search.
 */
ps_seg_t *ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf);


#endif /* __PS_LATTICE_INTERNAL_H__ */