1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
|
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <ogg/ogg.h>
#include "huffdec.h"
#include "decint.h"
/*Instead of storing every branching in the tree, subtrees can be collapsed
into one node, with a table of size 1<<nbits pointing directly to its
descedents nbits levels down.
This allows more than one bit to be read at a time, and avoids following all
the intermediate branches with next to no increased code complexity once
the collapsed tree has been built.
We do _not_ require that a subtree be complete to be collapsed, but instead
store duplicate pointers in the table, and record the actual depth of the
node below its parent.
This tells us the number of bits to advance the stream after reaching it.
This turns out to be equivalent to the method described in \cite{Hash95},
without the requirement that codewords be sorted by length.
If the codewords were sorted by length (so-called ``canonical-codes''), they
could be decoded much faster via either Lindell and Moffat's approach or
Hashemian's Condensed Huffman Code approach, the latter of which has an
extremely small memory footprint.
We can't use Choueka et al.'s finite state machine approach, which is
extremely fast, because we can't allow multiple symbols to be output at a
time; the codebook can and does change between symbols.
It also has very large memory requirements, which impairs cache coherency.
We store the tree packed in an array of 16-bit integers (words).
Each node consists of a single word, followed consecutively by two or more
indices of its children.
Let n be the value of this first word.
This is the number of bits that need to be read to traverse the node, and
must be positive.
1<<n entries follow in the array, each an index to a child node.
If the child is positive, then it is the index of another internal node in
the table.
If the child is negative or zero, then it is a leaf node.
These are stored directly in the child pointer to save space, since they only
require a single word.
If a leaf node would have been encountered before reading n bits, then it is
duplicated the necessary number of times in this table.
Leaf nodes pack both a token value and their actual depth in the tree.
The token in the leaf node is (-leaf&255).
The number of bits that need to be consumed to reach the leaf, starting from
the current node, is (-leaf>>8).
@ARTICLE{Hash95,
author="Reza Hashemian",
title="Memory Efficient and High-Speed Search {Huffman} Coding",
journal="{IEEE} Transactions on Communications",
volume=43,
number=10,
pages="2576--2581",
month=Oct,
year=1995
}*/
/*The map from external spec-defined tokens to internal tokens.
This is constructed so that any extra bits read with the original token value
can be masked off the least significant bits of its internal token index.
In addition, all of the tokens which require additional extra bits are placed
at the start of the list, and grouped by type.
OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
giving it index 0 may simplify comparisons on some architectures.
These requirements require some substantial reordering.*/
static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
/*OC_DCT_EOB1_TOKEN (0 extra bits)*/
15,
/*OC_DCT_EOB2_TOKEN (0 extra bits)*/
16,
/*OC_DCT_EOB3_TOKEN (0 extra bits)*/
17,
/*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
88,
/*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
80,
/*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
1,
/*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
0,
/*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
48,
/*OC_DCT_ZRL_TOKEN (6 extra bits)*/
14,
/*OC_ONE_TOKEN (0 extra bits)*/
56,
/*OC_MINUS_ONE_TOKEN (0 extra bits)*/
57,
/*OC_TWO_TOKEN (0 extra bits)*/
58,
/*OC_MINUS_TWO_TOKEN (0 extra bits)*/
59,
/*OC_DCT_VAL_CAT2 (1 extra bit)*/
60,
62,
64,
66,
/*OC_DCT_VAL_CAT3 (2 extra bits)*/
68,
/*OC_DCT_VAL_CAT4 (3 extra bits)*/
72,
/*OC_DCT_VAL_CAT5 (4 extra bits)*/
2,
/*OC_DCT_VAL_CAT6 (5 extra bits)*/
4,
/*OC_DCT_VAL_CAT7 (6 extra bits)*/
6,
/*OC_DCT_VAL_CAT8 (10 extra bits)*/
8,
/*OC_DCT_RUN_CAT1A (1 extra bit)*/
18,
20,
22,
24,
26,
/*OC_DCT_RUN_CAT1B (3 extra bits)*/
32,
/*OC_DCT_RUN_CAT1C (4 extra bits)*/
12,
/*OC_DCT_RUN_CAT2A (2 extra bits)*/
28,
/*OC_DCT_RUN_CAT2B (3 extra bits)*/
40
};
/*The log base 2 of number of internal tokens associated with each of the spec
tokens (i.e., how many of the extra bits are folded into the token value).
Increasing the maximum value beyond 3 will enlarge the amount of stack
required for tree construction.*/
static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
};
/*The size a lookup table is allowed to grow to relative to the number of
unique nodes it contains.
E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
wasted (1/4 of the space must be used).
Larger numbers can decode tokens with fewer read operations, while smaller
numbers may save more space.
With a sample file:
32233473 read calls are required when no tree collapsing is done (100.0%).
19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
Since a value of 2 gets us the vast majority of the speed-up with only a
small amount of wasted memory, this is what we use.
This value must be less than 128, or you could create a tree with more than
32767 entries, which would overflow the 16-bit words used to index it.*/
#define OC_HUFF_SLUSH (2)
/*The root of the tree is on the fast path, and a larger value here is more
beneficial than elsewhere in the tree.
7 appears to give the best performance, trading off between increased use of
the single-read fast path and cache footprint for the tables, though
obviously this will depend on your cache size.
Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
#define OC_ROOT_HUFF_SLUSH (7)
/*Unpacks a Huffman codebook.
_opb: The buffer to unpack from.
_tokens: Stores a list of internal tokens, in the order they were found in
the codebook, and the lengths of their corresponding codewords.
This is enough to completely define the codebook, while minimizing
stack usage and avoiding temporary allocations (for platforms
where free() is a no-op).
Return: The number of internal tokens in the codebook, or a negative value
on error.*/
int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
ogg_uint32_t code;
int len;
int ntokens;
int nleaves;
code=0;
len=ntokens=nleaves=0;
for(;;){
long bits;
bits=oc_pack_read1(_opb);
/*Only process nodes so long as there's more bits in the buffer.*/
if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
/*Read an internal node:*/
if(!bits){
len++;
/*Don't allow codewords longer than 32 bits.*/
if(len>32)return TH_EBADHEADER;
}
/*Read a leaf node:*/
else{
ogg_uint32_t code_bit;
int neb;
int nentries;
int token;
/*Don't allow more than 32 spec-tokens per codebook.*/
if(++nleaves>32)return TH_EBADHEADER;
bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
token=OC_DCT_TOKEN_MAP[bits];
nentries=1<<neb;
while(nentries-->0){
_tokens[ntokens][0]=(unsigned char)token++;
_tokens[ntokens][1]=(unsigned char)(len+neb);
ntokens++;
}
code_bit=0x80000000U>>len-1;
while(len>0&&(code&code_bit)){
code^=code_bit;
code_bit<<=1;
len--;
}
if(len<=0)break;
code|=code_bit;
}
}
return ntokens;
}
/*Count how many tokens would be required to fill a subtree at depth _depth.
_tokens: A list of internal tokens, in the order they are found in the
codebook, and the lengths of their corresponding codewords.
_depth: The depth of the desired node in the corresponding tree structure.
Return: The number of tokens that belong to that subtree.*/
static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
ogg_uint32_t code;
int ti;
code=0;
ti=0;
do{
if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
else{
/*Because of the expanded internal tokens, we can have codewords as long
as 35 bits.
A single recursion here is enough to advance past them.*/
code++;
ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
}
}
while(code<0x80000000U);
return ti;
}
/*Compute the number of bits to use for a collapsed tree node at the given
depth.
_tokens: A list of internal tokens, in the order they are found in the
codebook, and the lengths of their corresponding codewords.
_ntokens: The number of tokens corresponding to this tree node.
_depth: The depth of this tree node.
Return: The number of bits to use for a collapsed tree node rooted here.
This is always at least one, even if this was a leaf node.*/
static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
int _ntokens,int _depth){
int got_leaves;
int loccupancy;
int occupancy;
int slush;
int nbits;
int best_nbits;
slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
/*It's legal to have a tree with just a single node, which requires no bits
to decode and always returns the same token.
However, no encoder actually does this (yet).
To avoid a special case in oc_huff_token_decode(), we force the number of
lookahead bits to be at least one.
This will produce a tree that looks ahead one bit and then advances the
stream zero bits.*/
nbits=1;
occupancy=2;
got_leaves=1;
do{
int ti;
if(got_leaves)best_nbits=nbits;
nbits++;
got_leaves=0;
loccupancy=occupancy;
for(occupancy=ti=0;ti<_ntokens;occupancy++){
if(_tokens[ti][1]<_depth+nbits)ti++;
else if(_tokens[ti][1]==_depth+nbits){
got_leaves=1;
ti++;
}
else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
}
}
while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
return best_nbits;
}
/*Determines the size in words of a Huffman tree node that represents a
subtree of depth _nbits.
_nbits: The depth of the subtree.
This must be greater than zero.
Return: The number of words required to store the node.*/
static size_t oc_huff_node_size(int _nbits){
return 1+(1<<_nbits);
}
/*Produces a collapsed-tree representation of the given token list.
_tree: The storage for the collapsed Huffman tree.
This may be NULL to compute the required storage size instead of
constructing the tree.
_tokens: A list of internal tokens, in the order they are found in the
codebook, and the lengths of their corresponding codewords.
_ntokens: The number of tokens corresponding to this tree node.
Return: The number of words required to store the tree.*/
#if defined(_MSC_VER) && _MSC_VER >= 1700
#pragma optimize( "", off )
#endif
static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
unsigned char _tokens[][2],int _ntokens){
ogg_int16_t node[34];
unsigned char depth[34];
unsigned char last[34];
size_t ntree;
int ti;
int l;
depth[0]=0;
last[0]=(unsigned char)(_ntokens-1);
ntree=0;
ti=0;
l=0;
do{
int nbits;
nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
node[l]=(ogg_int16_t)ntree;
ntree+=oc_huff_node_size(nbits);
if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
do{
while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
if(_tree!=NULL){
ogg_int16_t leaf;
int nentries;
nentries=1<<depth[l]+nbits-_tokens[ti][1];
leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
while(nentries-->0)_tree[node[l]++]=leaf;
}
ti++;
}
if(ti<=last[l]){
/*We need to recurse*/
depth[l+1]=(unsigned char)(depth[l]+nbits);
if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
l++;
last[l]=
(unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
break;
}
/*Pop back up a level of recursion.*/
else if(l-->0)nbits=depth[l+1]-depth[l];
}
while(l>=0);
}
while(l>=0);
return ntree;
}
#if defined(_MSC_VER) && _MSC_VER >= 1700
#pragma optimize( "", on )
#endif
/*Unpacks a set of Huffman trees, and reduces them to a collapsed
representation.
_opb: The buffer to unpack the trees from.
_nodes: The table to fill with the Huffman trees.
Return: 0 on success, or a negative value on error.
The caller is responsible for cleaning up any partially initialized
_nodes on failure.*/
int oc_huff_trees_unpack(oc_pack_buf *_opb,
ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
unsigned char tokens[256][2];
int ntokens;
ogg_int16_t *tree;
size_t size;
/*Unpack the full tree into a temporary buffer.*/
ntokens=oc_huff_tree_unpack(_opb,tokens);
if(ntokens<0)return ntokens;
/*Figure out how big the collapsed tree will be and allocate space for it.*/
size=oc_huff_tree_collapse(NULL,tokens,ntokens);
/*This should never happen; if it does it means you set OC_HUFF_SLUSH or
OC_ROOT_HUFF_SLUSH too large.*/
if(size>32767)return TH_EIMPL;
tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
if(tree==NULL)return TH_EFAULT;
/*Construct the collapsed the tree.*/
oc_huff_tree_collapse(tree,tokens,ntokens);
_nodes[i]=tree;
}
return 0;
}
/*Determines the size in words of a Huffman subtree.
_tree: The complete Huffman tree.
_node: The index of the root of the desired subtree.
Return: The number of words required to store the tree.*/
static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
size_t size;
int nchildren;
int n;
int i;
n=_tree[_node];
size=oc_huff_node_size(n);
nchildren=1<<n;
i=0;
do{
int child;
child=_tree[_node+i+1];
if(child<=0)i+=1<<n-(-child>>8);
else{
size+=oc_huff_tree_size(_tree,child);
i++;
}
}
while(i<nchildren);
return size;
}
/*Makes a copy of the given set of Huffman trees.
_dst: The array to store the copy in.
_src: The array of trees to copy.*/
int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
int total;
int i;
total=0;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
size_t size;
size=oc_huff_tree_size(_src[i],0);
total+=size;
_dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
if(_dst[i]==NULL){
while(i-->0)_ogg_free(_dst[i]);
return TH_EFAULT;
}
memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
}
return 0;
}
/*Frees the memory used by a set of Huffman trees.
_nodes: The array of trees to free.*/
void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
}
/*Unpacks a single token using the given Huffman tree.
_opb: The buffer to unpack the token from.
_node: The tree to unpack the token with.
Return: The token value.*/
int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
const unsigned char *ptr;
const unsigned char *stop;
oc_pb_window window;
int available;
long bits;
int node;
int n;
ptr=_opb->ptr;
window=_opb->window;
stop=_opb->stop;
available=_opb->bits;
node=0;
for(;;){
n=_tree[node];
if(n>available){
unsigned shift;
shift=OC_PB_WINDOW_SIZE-available;
do{
/*We don't bother setting eof because we won't check for it after we've
started decoding DCT tokens.*/
if(ptr>=stop){
shift=(unsigned)-OC_LOTS_OF_BITS;
break;
}
shift-=8;
window|=(oc_pb_window)*ptr++<<shift;
}
while(shift>=8);
/*Note: We never request more than 24 bits, so there's no need to fill in
the last partial byte here.*/
available=OC_PB_WINDOW_SIZE-shift;
}
bits=window>>OC_PB_WINDOW_SIZE-n;
node=_tree[node+1+bits];
if(node<=0)break;
window<<=n;
available-=n;
}
node=-node;
n=node>>8;
window<<=n;
available-=n;
_opb->ptr=ptr;
_opb->window=window;
_opb->bits=available;
return node&255;
}
|