/********************************************************************1* *2* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *3* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *4* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *5* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *6* *7* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009,2025 *8* by the Xiph.Org Foundation and contributors *9* https://www.xiph.org/ *10* *11********************************************************************1213function:1415********************************************************************/1617#include <stdlib.h>18#include <string.h>19#include <ogg/ogg.h>20#include "huffdec.h"21#include "decint.h"22232425/*Instead of storing every branching in the tree, subtrees can be collapsed26into one node, with a table of size 1<<nbits pointing directly to its27descedents nbits levels down.28This allows more than one bit to be read at a time, and avoids following all29the intermediate branches with next to no increased code complexity once30the collapsed tree has been built.31We do _not_ require that a subtree be complete to be collapsed, but instead32store duplicate pointers in the table, and record the actual depth of the33node below its parent.34This tells us the number of bits to advance the stream after reaching it.3536This turns out to be equivalent to the method described in \cite{Hash95},37without the requirement that codewords be sorted by length.38If the codewords were sorted by length (so-called ``canonical-codes''), they39could be decoded much faster via either Lindell and Moffat's approach or40Hashemian's Condensed Huffman Code approach, the latter of which has an41extremely small memory footprint.42We can't use Choueka et al.'s finite state machine approach, which is43extremely fast, because we can't allow multiple symbols to be output at a44time; the codebook can and does change between symbols.45It also has very large memory requirements, which impairs cache coherency.4647We store the tree packed in an array of 16-bit integers (words).48Each node consists of a single word, followed consecutively by two or more49indices of its children.50Let n be the value of this first word.51This is the number of bits that need to be read to traverse the node, and52must be positive.531<<n entries follow in the array, each an index to a child node.54If the child is positive, then it is the index of another internal node in55the table.56If the child is negative or zero, then it is a leaf node.57These are stored directly in the child pointer to save space, since they only58require a single word.59If a leaf node would have been encountered before reading n bits, then it is60duplicated the necessary number of times in this table.61Leaf nodes pack both a token value and their actual depth in the tree.62The token in the leaf node is (-leaf&255).63The number of bits that need to be consumed to reach the leaf, starting from64the current node, is (-leaf>>8).6566@ARTICLE{Hash95,67author="Reza Hashemian",68title="Memory Efficient and High-Speed Search {Huffman} Coding",69journal="{IEEE} Transactions on Communications",70volume=43,71number=10,72pages="2576--2581",73month=Oct,74year=199575}*/76777879/*The map from external spec-defined tokens to internal tokens.80This is constructed so that any extra bits read with the original token value81can be masked off the least significant bits of its internal token index.82In addition, all of the tokens which require additional extra bits are placed83at the start of the list, and grouped by type.84OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so85giving it index 0 may simplify comparisons on some architectures.86These requirements require some substantial reordering.*/87static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={88/*OC_DCT_EOB1_TOKEN (0 extra bits)*/8915,90/*OC_DCT_EOB2_TOKEN (0 extra bits)*/9116,92/*OC_DCT_EOB3_TOKEN (0 extra bits)*/9317,94/*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/9588,96/*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/9780,98/*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/991,100/*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/1010,102/*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/10348,104/*OC_DCT_ZRL_TOKEN (6 extra bits)*/10514,106/*OC_ONE_TOKEN (0 extra bits)*/10756,108/*OC_MINUS_ONE_TOKEN (0 extra bits)*/10957,110/*OC_TWO_TOKEN (0 extra bits)*/11158,112/*OC_MINUS_TWO_TOKEN (0 extra bits)*/11359,114/*OC_DCT_VAL_CAT2 (1 extra bit)*/11560,11662,11764,11866,119/*OC_DCT_VAL_CAT3 (2 extra bits)*/12068,121/*OC_DCT_VAL_CAT4 (3 extra bits)*/12272,123/*OC_DCT_VAL_CAT5 (4 extra bits)*/1242,125/*OC_DCT_VAL_CAT6 (5 extra bits)*/1264,127/*OC_DCT_VAL_CAT7 (6 extra bits)*/1286,129/*OC_DCT_VAL_CAT8 (10 extra bits)*/1308,131/*OC_DCT_RUN_CAT1A (1 extra bit)*/13218,13320,13422,13524,13626,137/*OC_DCT_RUN_CAT1B (3 extra bits)*/13832,139/*OC_DCT_RUN_CAT1C (4 extra bits)*/14012,141/*OC_DCT_RUN_CAT2A (2 extra bits)*/14228,143/*OC_DCT_RUN_CAT2B (3 extra bits)*/14440145};146147/*The log base 2 of number of internal tokens associated with each of the spec148tokens (i.e., how many of the extra bits are folded into the token value).149Increasing the maximum value beyond 3 will enlarge the amount of stack150required for tree construction.*/151static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={1520,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3153};154155156/*The size a lookup table is allowed to grow to relative to the number of157unique nodes it contains.158E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is159wasted (1/4 of the space must be used).160Larger numbers can decode tokens with fewer read operations, while smaller161numbers may save more space.162With a sample file:16332233473 read calls are required when no tree collapsing is done (100.0%).16419269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).16511144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).16610538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).16710192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).168Since a value of 2 gets us the vast majority of the speed-up with only a169small amount of wasted memory, this is what we use.170This value must be less than 128, or you could create a tree with more than17132767 entries, which would overflow the 16-bit words used to index it.*/172#define OC_HUFF_SLUSH (2)173/*The root of the tree is on the fast path, and a larger value here is more174beneficial than elsewhere in the tree.1757 appears to give the best performance, trading off between increased use of176the single-read fast path and cache footprint for the tables, though177obviously this will depend on your cache size.178Using 7 here, the VP3 tables are about twice as large compared to using 2.*/179#define OC_ROOT_HUFF_SLUSH (7)180181182183/*Unpacks a Huffman codebook.184_opb: The buffer to unpack from.185_tokens: Stores a list of internal tokens, in the order they were found in186the codebook, and the lengths of their corresponding codewords.187This is enough to completely define the codebook, while minimizing188stack usage and avoiding temporary allocations (for platforms189where free() is a no-op).190Return: The number of internal tokens in the codebook, or a negative value191on error.*/192int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){193ogg_uint32_t code;194int len;195int ntokens;196int nleaves;197code=0;198len=ntokens=nleaves=0;199for(;;){200long bits;201bits=oc_pack_read1(_opb);202/*Only process nodes so long as there's more bits in the buffer.*/203if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;204/*Read an internal node:*/205if(!bits){206len++;207/*Don't allow codewords longer than 32 bits.*/208if(len>32)return TH_EBADHEADER;209}210/*Read a leaf node:*/211else{212ogg_uint32_t code_bit;213int neb;214int nentries;215int token;216/*Don't allow more than 32 spec-tokens per codebook.*/217if(++nleaves>32)return TH_EBADHEADER;218bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);219neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];220token=OC_DCT_TOKEN_MAP[bits];221nentries=1<<neb;222while(nentries-->0){223_tokens[ntokens][0]=(unsigned char)token++;224_tokens[ntokens][1]=(unsigned char)(len+neb);225ntokens++;226}227if(len<=0)break;228code_bit=0x80000000U>>len-1;229while(len>0&&(code&code_bit)){230code^=code_bit;231code_bit<<=1;232len--;233}234if(len<=0)break;235code|=code_bit;236}237}238return ntokens;239}240241/*Count how many tokens would be required to fill a subtree at depth _depth.242_tokens: A list of internal tokens, in the order they are found in the243codebook, and the lengths of their corresponding codewords.244_depth: The depth of the desired node in the corresponding tree structure.245Return: The number of tokens that belong to that subtree.*/246static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){247ogg_uint32_t code;248int ti;249code=0;250ti=0;251do{252if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;253else{254/*Because of the expanded internal tokens, we can have codewords as long255as 35 bits.256A single recursion here is enough to advance past them.*/257code++;258ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);259}260}261while(code<0x80000000U);262return ti;263}264265/*Compute the number of bits to use for a collapsed tree node at the given266depth.267_tokens: A list of internal tokens, in the order they are found in the268codebook, and the lengths of their corresponding codewords.269_ntokens: The number of tokens corresponding to this tree node.270_depth: The depth of this tree node.271Return: The number of bits to use for a collapsed tree node rooted here.272This is always at least one, even if this was a leaf node.*/273static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],274int _ntokens,int _depth){275int got_leaves;276int loccupancy;277int occupancy;278int slush;279int nbits;280int best_nbits;281slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;282/*It's legal to have a tree with just a single node, which requires no bits283to decode and always returns the same token.284However, no encoder actually does this (yet).285To avoid a special case in oc_huff_token_decode(), we force the number of286lookahead bits to be at least one.287This will produce a tree that looks ahead one bit and then advances the288stream zero bits.*/289nbits=1;290occupancy=2;291got_leaves=1;292do{293int ti;294if(got_leaves)best_nbits=nbits;295nbits++;296got_leaves=0;297loccupancy=occupancy;298for(occupancy=ti=0;ti<_ntokens;occupancy++){299if(_tokens[ti][1]<_depth+nbits)ti++;300else if(_tokens[ti][1]==_depth+nbits){301got_leaves=1;302ti++;303}304else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);305}306}307while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);308return best_nbits;309}310311/*Determines the size in words of a Huffman tree node that represents a312subtree of depth _nbits.313_nbits: The depth of the subtree.314This must be greater than zero.315Return: The number of words required to store the node.*/316static size_t oc_huff_node_size(int _nbits){317return 1+(1<<_nbits);318}319320/*Produces a collapsed-tree representation of the given token list.321_tree: The storage for the collapsed Huffman tree.322This may be NULL to compute the required storage size instead of323constructing the tree.324_tokens: A list of internal tokens, in the order they are found in the325codebook, and the lengths of their corresponding codewords.326_ntokens: The number of tokens corresponding to this tree node.327Return: The number of words required to store the tree.*/328static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,329unsigned char _tokens[][2],int _ntokens){330ogg_int16_t node[34];331unsigned char depth[34];332unsigned char last[34];333size_t ntree;334int ti;335int l;336depth[0]=0;337last[0]=(unsigned char)(_ntokens-1);338ntree=0;339ti=0;340l=0;341do{342int nbits;343nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);344node[l]=(ogg_int16_t)ntree;345ntree+=oc_huff_node_size(nbits);346if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;347do{348while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){349if(_tree!=NULL){350ogg_int16_t leaf;351int nentries;352nentries=1<<depth[l]+nbits-_tokens[ti][1];353leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);354while(nentries-->0)_tree[node[l]++]=leaf;355}356ti++;357}358if(ti<=last[l]){359/*We need to recurse*/360depth[l+1]=(unsigned char)(depth[l]+nbits);361if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;362l++;363last[l]=364(unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);365break;366}367/*Pop back up a level of recursion.*/368else if(l-->0)nbits=depth[l+1]-depth[l];369}370while(l>=0);371}372while(l>=0);373return ntree;374}375376/*Unpacks a set of Huffman trees, and reduces them to a collapsed377representation.378_opb: The buffer to unpack the trees from.379_nodes: The table to fill with the Huffman trees.380Return: 0 on success, or a negative value on error.381The caller is responsible for cleaning up any partially initialized382_nodes on failure.*/383int oc_huff_trees_unpack(oc_pack_buf *_opb,384ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){385int i;386for(i=0;i<TH_NHUFFMAN_TABLES;i++){387unsigned char tokens[256][2];388int ntokens;389ogg_int16_t *tree;390size_t size;391/*Unpack the full tree into a temporary buffer.*/392ntokens=oc_huff_tree_unpack(_opb,tokens);393if(ntokens<0)return ntokens;394/*Figure out how big the collapsed tree will be and allocate space for it.*/395size=oc_huff_tree_collapse(NULL,tokens,ntokens);396/*This should never happen; if it does it means you set OC_HUFF_SLUSH or397OC_ROOT_HUFF_SLUSH too large.*/398if(size>32767)return TH_EIMPL;399tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));400if(tree==NULL)return TH_EFAULT;401/*Construct the collapsed the tree.*/402oc_huff_tree_collapse(tree,tokens,ntokens);403_nodes[i]=tree;404}405return 0;406}407408/*Determines the size in words of a Huffman subtree.409_tree: The complete Huffman tree.410_node: The index of the root of the desired subtree.411Return: The number of words required to store the tree.*/412static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){413size_t size;414int nchildren;415int n;416int i;417n=_tree[_node];418size=oc_huff_node_size(n);419nchildren=1<<n;420i=0;421do{422int child;423child=_tree[_node+i+1];424if(child<=0)i+=1<<n-(-child>>8);425else{426size+=oc_huff_tree_size(_tree,child);427i++;428}429}430while(i<nchildren);431return size;432}433434/*Makes a copy of the given set of Huffman trees.435_dst: The array to store the copy in.436_src: The array of trees to copy.*/437int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],438const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){439int i;440for(i=0;i<TH_NHUFFMAN_TABLES;i++){441size_t size;442size=oc_huff_tree_size(_src[i],0);443_dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));444if(_dst[i]==NULL){445while(i-->0)_ogg_free(_dst[i]);446return TH_EFAULT;447}448memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));449}450return 0;451}452453/*Frees the memory used by a set of Huffman trees.454_nodes: The array of trees to free.*/455void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){456int i;457for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);458}459460461/*Unpacks a single token using the given Huffman tree.462_opb: The buffer to unpack the token from.463_node: The tree to unpack the token with.464Return: The token value.*/465int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){466const unsigned char *ptr;467const unsigned char *stop;468oc_pb_window window;469int available;470long bits;471int node;472int n;473ptr=_opb->ptr;474window=_opb->window;475stop=_opb->stop;476available=_opb->bits;477node=0;478for(;;){479n=_tree[node];480if(n>available){481unsigned shift;482shift=OC_PB_WINDOW_SIZE-available;483do{484/*We don't bother setting eof because we won't check for it after we've485started decoding DCT tokens.*/486if(ptr>=stop){487shift=(unsigned)-OC_LOTS_OF_BITS;488break;489}490shift-=8;491window|=(oc_pb_window)*ptr++<<shift;492}493while(shift>=8);494/*Note: We never request more than 24 bits, so there's no need to fill in495the last partial byte here.*/496available=OC_PB_WINDOW_SIZE-shift;497}498bits=window>>OC_PB_WINDOW_SIZE-n;499node=_tree[node+1+bits];500if(node<=0)break;501window<<=n;502available-=n;503}504node=-node;505n=node>>8;506window<<=n;507available-=n;508_opb->ptr=ptr;509_opb->window=window;510_opb->bits=available;511return node&255;512}513514515