Path: blob/master/3rdparty/libjpeg-turbo/src/jchuff.c
16337 views
/*1* jchuff.c2*3* This file was part of the Independent JPEG Group's software:4* Copyright (C) 1991-1997, Thomas G. Lane.5* libjpeg-turbo Modifications:6* Copyright (C) 2009-2011, 2014-2016, D. R. Commander.7* Copyright (C) 2015, Matthieu Darbois.8* For conditions of distribution and use, see the accompanying README.ijg9* file.10*11* This file contains Huffman entropy encoding routines.12*13* Much of the complexity here has to do with supporting output suspension.14* If the data destination module demands suspension, we want to be able to15* back up to the start of the current MCU. To do this, we copy state16* variables into local working storage, and update them back to the17* permanent JPEG objects only upon successful completion of an MCU.18*/1920#define JPEG_INTERNALS21#include "jinclude.h"22#include "jpeglib.h"23#include "jsimd.h"24#include "jconfigint.h"25#include <limits.h>2627/*28* NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be29* used for bit counting rather than the lookup table. This will reduce the30* memory footprint by 64k, which is important for some mobile applications31* that create many isolated instances of libjpeg-turbo (web browsers, for32* instance.) This may improve performance on some mobile platforms as well.33* This feature is enabled by default only on ARM processors, because some x8634* chips have a slow implementation of bsr, and the use of clz/bsr cannot be35* shown to have a significant performance impact even on the x86 chips that36* have a fast implementation of it. When building for ARMv6, you can37* explicitly disable the use of clz/bsr by adding -mthumb to the compiler38* flags (this defines __thumb__).39*/4041/* NOTE: Both GCC and Clang define __GNUC__ */42#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)43#if !defined __thumb__ || defined __thumb2__44#define USE_CLZ_INTRINSIC45#endif46#endif4748#ifdef USE_CLZ_INTRINSIC49#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))50#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)51#else52#include "jpeg_nbits_table.h"53#define JPEG_NBITS(x) (jpeg_nbits_table[x])54#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)55#endif5657#ifndef min58#define min(a,b) ((a)<(b)?(a):(b))59#endif606162/* Expanded entropy encoder object for Huffman encoding.63*64* The savable_state subrecord contains fields that change within an MCU,65* but must not be updated permanently until we complete the MCU.66*/6768typedef struct {69size_t put_buffer; /* current bit-accumulation buffer */70int put_bits; /* # of bits now in it */71int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */72} savable_state;7374/* This macro is to work around compilers with missing or broken75* structure assignment. You'll need to fix this code if you have76* such a compiler and you change MAX_COMPS_IN_SCAN.77*/7879#ifndef NO_STRUCT_ASSIGN80#define ASSIGN_STATE(dest,src) ((dest) = (src))81#else82#if MAX_COMPS_IN_SCAN == 483#define ASSIGN_STATE(dest,src) \84((dest).put_buffer = (src).put_buffer, \85(dest).put_bits = (src).put_bits, \86(dest).last_dc_val[0] = (src).last_dc_val[0], \87(dest).last_dc_val[1] = (src).last_dc_val[1], \88(dest).last_dc_val[2] = (src).last_dc_val[2], \89(dest).last_dc_val[3] = (src).last_dc_val[3])90#endif91#endif929394typedef struct {95struct jpeg_entropy_encoder pub; /* public fields */9697savable_state saved; /* Bit buffer & DC state at start of MCU */9899/* These fields are NOT loaded into local working state. */100unsigned int restarts_to_go; /* MCUs left in this restart interval */101int next_restart_num; /* next restart number to write (0-7) */102103/* Pointers to derived tables (these workspaces have image lifespan) */104c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];105c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];106107#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */108long *dc_count_ptrs[NUM_HUFF_TBLS];109long *ac_count_ptrs[NUM_HUFF_TBLS];110#endif111112int simd;113} huff_entropy_encoder;114115typedef huff_entropy_encoder *huff_entropy_ptr;116117/* Working state while writing an MCU.118* This struct contains all the fields that are needed by subroutines.119*/120121typedef struct {122JOCTET *next_output_byte; /* => next byte to write in buffer */123size_t free_in_buffer; /* # of byte spaces remaining in buffer */124savable_state cur; /* Current bit buffer & DC state */125j_compress_ptr cinfo; /* dump_buffer needs access to this */126} working_state;127128129/* Forward declarations */130METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data);131METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo);132#ifdef ENTROPY_OPT_SUPPORTED133METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo,134JBLOCKROW *MCU_data);135METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo);136#endif137138139/*140* Initialize for a Huffman-compressed scan.141* If gather_statistics is TRUE, we do not output anything during the scan,142* just count the Huffman symbols used and generate Huffman code tables.143*/144145METHODDEF(void)146start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)147{148huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;149int ci, dctbl, actbl;150jpeg_component_info *compptr;151152if (gather_statistics) {153#ifdef ENTROPY_OPT_SUPPORTED154entropy->pub.encode_mcu = encode_mcu_gather;155entropy->pub.finish_pass = finish_pass_gather;156#else157ERREXIT(cinfo, JERR_NOT_COMPILED);158#endif159} else {160entropy->pub.encode_mcu = encode_mcu_huff;161entropy->pub.finish_pass = finish_pass_huff;162}163164entropy->simd = jsimd_can_huff_encode_one_block();165166for (ci = 0; ci < cinfo->comps_in_scan; ci++) {167compptr = cinfo->cur_comp_info[ci];168dctbl = compptr->dc_tbl_no;169actbl = compptr->ac_tbl_no;170if (gather_statistics) {171#ifdef ENTROPY_OPT_SUPPORTED172/* Check for invalid table indexes */173/* (make_c_derived_tbl does this in the other path) */174if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)175ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);176if (actbl < 0 || actbl >= NUM_HUFF_TBLS)177ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);178/* Allocate and zero the statistics tables */179/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */180if (entropy->dc_count_ptrs[dctbl] == NULL)181entropy->dc_count_ptrs[dctbl] = (long *)182(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,183257 * sizeof(long));184MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long));185if (entropy->ac_count_ptrs[actbl] == NULL)186entropy->ac_count_ptrs[actbl] = (long *)187(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,188257 * sizeof(long));189MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long));190#endif191} else {192/* Compute derived values for Huffman tables */193/* We may do this more than once for a table, but it's not expensive */194jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,195& entropy->dc_derived_tbls[dctbl]);196jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,197& entropy->ac_derived_tbls[actbl]);198}199/* Initialize DC predictions to 0 */200entropy->saved.last_dc_val[ci] = 0;201}202203/* Initialize bit buffer to empty */204entropy->saved.put_buffer = 0;205entropy->saved.put_bits = 0;206207/* Initialize restart stuff */208entropy->restarts_to_go = cinfo->restart_interval;209entropy->next_restart_num = 0;210}211212213/*214* Compute the derived values for a Huffman table.215* This routine also performs some validation checks on the table.216*217* Note this is also used by jcphuff.c.218*/219220GLOBAL(void)221jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,222c_derived_tbl **pdtbl)223{224JHUFF_TBL *htbl;225c_derived_tbl *dtbl;226int p, i, l, lastp, si, maxsymbol;227char huffsize[257];228unsigned int huffcode[257];229unsigned int code;230231/* Note that huffsize[] and huffcode[] are filled in code-length order,232* paralleling the order of the symbols themselves in htbl->huffval[].233*/234235/* Find the input Huffman table */236if (tblno < 0 || tblno >= NUM_HUFF_TBLS)237ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);238htbl =239isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];240if (htbl == NULL)241ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);242243/* Allocate a workspace if we haven't already done so. */244if (*pdtbl == NULL)245*pdtbl = (c_derived_tbl *)246(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,247sizeof(c_derived_tbl));248dtbl = *pdtbl;249250/* Figure C.1: make table of Huffman code length for each symbol */251252p = 0;253for (l = 1; l <= 16; l++) {254i = (int) htbl->bits[l];255if (i < 0 || p + i > 256) /* protect against table overrun */256ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);257while (i--)258huffsize[p++] = (char) l;259}260huffsize[p] = 0;261lastp = p;262263/* Figure C.2: generate the codes themselves */264/* We also validate that the counts represent a legal Huffman code tree. */265266code = 0;267si = huffsize[0];268p = 0;269while (huffsize[p]) {270while (((int) huffsize[p]) == si) {271huffcode[p++] = code;272code++;273}274/* code is now 1 more than the last code used for codelength si; but275* it must still fit in si bits, since no code is allowed to be all ones.276*/277if (((JLONG) code) >= (((JLONG) 1) << si))278ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);279code <<= 1;280si++;281}282283/* Figure C.3: generate encoding tables */284/* These are code and size indexed by symbol value */285286/* Set all codeless symbols to have code length 0;287* this lets us detect duplicate VAL entries here, and later288* allows emit_bits to detect any attempt to emit such symbols.289*/290MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));291292/* This is also a convenient place to check for out-of-range293* and duplicated VAL entries. We allow 0..255 for AC symbols294* but only 0..15 for DC. (We could constrain them further295* based on data depth and mode, but this seems enough.)296*/297maxsymbol = isDC ? 15 : 255;298299for (p = 0; p < lastp; p++) {300i = htbl->huffval[p];301if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])302ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);303dtbl->ehufco[i] = huffcode[p];304dtbl->ehufsi[i] = huffsize[p];305}306}307308309/* Outputting bytes to the file */310311/* Emit a byte, taking 'action' if must suspend. */312#define emit_byte(state,val,action) \313{ *(state)->next_output_byte++ = (JOCTET) (val); \314if (--(state)->free_in_buffer == 0) \315if (! dump_buffer(state)) \316{ action; } }317318319LOCAL(boolean)320dump_buffer (working_state *state)321/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */322{323struct jpeg_destination_mgr *dest = state->cinfo->dest;324325if (! (*dest->empty_output_buffer) (state->cinfo))326return FALSE;327/* After a successful buffer dump, must reset buffer pointers */328state->next_output_byte = dest->next_output_byte;329state->free_in_buffer = dest->free_in_buffer;330return TRUE;331}332333334/* Outputting bits to the file */335336/* These macros perform the same task as the emit_bits() function in the337* original libjpeg code. In addition to reducing overhead by explicitly338* inlining the code, additional performance is achieved by taking into339* account the size of the bit buffer and waiting until it is almost full340* before emptying it. This mostly benefits 64-bit platforms, since 6341* bytes can be stored in a 64-bit bit buffer before it has to be emptied.342*/343344#define EMIT_BYTE() { \345JOCTET c; \346put_bits -= 8; \347c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \348*buffer++ = c; \349if (c == 0xFF) /* need to stuff a zero byte? */ \350*buffer++ = 0; \351}352353#define PUT_BITS(code, size) { \354put_bits += size; \355put_buffer = (put_buffer << size) | code; \356}357358#define CHECKBUF15() { \359if (put_bits > 15) { \360EMIT_BYTE() \361EMIT_BYTE() \362} \363}364365#define CHECKBUF31() { \366if (put_bits > 31) { \367EMIT_BYTE() \368EMIT_BYTE() \369EMIT_BYTE() \370EMIT_BYTE() \371} \372}373374#define CHECKBUF47() { \375if (put_bits > 47) { \376EMIT_BYTE() \377EMIT_BYTE() \378EMIT_BYTE() \379EMIT_BYTE() \380EMIT_BYTE() \381EMIT_BYTE() \382} \383}384385#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)386#error Cannot determine word size387#endif388389#if SIZEOF_SIZE_T==8 || defined(_WIN64)390391#define EMIT_BITS(code, size) { \392CHECKBUF47() \393PUT_BITS(code, size) \394}395396#define EMIT_CODE(code, size) { \397temp2 &= (((JLONG) 1)<<nbits) - 1; \398CHECKBUF31() \399PUT_BITS(code, size) \400PUT_BITS(temp2, nbits) \401}402403#else404405#define EMIT_BITS(code, size) { \406PUT_BITS(code, size) \407CHECKBUF15() \408}409410#define EMIT_CODE(code, size) { \411temp2 &= (((JLONG) 1)<<nbits) - 1; \412PUT_BITS(code, size) \413CHECKBUF15() \414PUT_BITS(temp2, nbits) \415CHECKBUF15() \416}417418#endif419420421/* Although it is exceedingly rare, it is possible for a Huffman-encoded422* coefficient block to be larger than the 128-byte unencoded block. For each423* of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can424* theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per425* encoded block.) If, for instance, one artificially sets the AC426* coefficients to alternating values of 32767 and -32768 (using the JPEG427* scanning order-- 1, 8, 16, etc.), then this will produce an encoded block428* larger than 200 bytes.429*/430#define BUFSIZE (DCTSIZE2 * 4)431432#define LOAD_BUFFER() { \433if (state->free_in_buffer < BUFSIZE) { \434localbuf = 1; \435buffer = _buffer; \436} \437else buffer = state->next_output_byte; \438}439440#define STORE_BUFFER() { \441if (localbuf) { \442bytes = buffer - _buffer; \443buffer = _buffer; \444while (bytes > 0) { \445bytestocopy = min(bytes, state->free_in_buffer); \446MEMCOPY(state->next_output_byte, buffer, bytestocopy); \447state->next_output_byte += bytestocopy; \448buffer += bytestocopy; \449state->free_in_buffer -= bytestocopy; \450if (state->free_in_buffer == 0) \451if (! dump_buffer(state)) return FALSE; \452bytes -= bytestocopy; \453} \454} \455else { \456state->free_in_buffer -= (buffer - state->next_output_byte); \457state->next_output_byte = buffer; \458} \459}460461462LOCAL(boolean)463flush_bits (working_state *state)464{465JOCTET _buffer[BUFSIZE], *buffer;466size_t put_buffer; int put_bits;467size_t bytes, bytestocopy; int localbuf = 0;468469put_buffer = state->cur.put_buffer;470put_bits = state->cur.put_bits;471LOAD_BUFFER()472473/* fill any partial byte with ones */474PUT_BITS(0x7F, 7)475while (put_bits >= 8) EMIT_BYTE()476477state->cur.put_buffer = 0; /* and reset bit-buffer to empty */478state->cur.put_bits = 0;479STORE_BUFFER()480481return TRUE;482}483484485/* Encode a single block's worth of coefficients */486487LOCAL(boolean)488encode_one_block_simd (working_state *state, JCOEFPTR block, int last_dc_val,489c_derived_tbl *dctbl, c_derived_tbl *actbl)490{491JOCTET _buffer[BUFSIZE], *buffer;492size_t bytes, bytestocopy; int localbuf = 0;493494LOAD_BUFFER()495496buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val,497dctbl, actbl);498499STORE_BUFFER()500501return TRUE;502}503504LOCAL(boolean)505encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val,506c_derived_tbl *dctbl, c_derived_tbl *actbl)507{508int temp, temp2, temp3;509int nbits;510int r, code, size;511JOCTET _buffer[BUFSIZE], *buffer;512size_t put_buffer; int put_bits;513int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];514size_t bytes, bytestocopy; int localbuf = 0;515516put_buffer = state->cur.put_buffer;517put_bits = state->cur.put_bits;518LOAD_BUFFER()519520/* Encode the DC coefficient difference per section F.1.2.1 */521522temp = temp2 = block[0] - last_dc_val;523524/* This is a well-known technique for obtaining the absolute value without a525* branch. It is derived from an assembly language technique presented in526* "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by527* Agner Fog.528*/529temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);530temp ^= temp3;531temp -= temp3;532533/* For a negative input, want temp2 = bitwise complement of abs(input) */534/* This code assumes we are on a two's complement machine */535temp2 += temp3;536537/* Find the number of bits needed for the magnitude of the coefficient */538nbits = JPEG_NBITS(temp);539540/* Emit the Huffman-coded symbol for the number of bits */541code = dctbl->ehufco[nbits];542size = dctbl->ehufsi[nbits];543EMIT_BITS(code, size)544545/* Mask off any extra bits in code */546temp2 &= (((JLONG) 1)<<nbits) - 1;547548/* Emit that number of bits of the value, if positive, */549/* or the complement of its magnitude, if negative. */550EMIT_BITS(temp2, nbits)551552/* Encode the AC coefficients per section F.1.2.2 */553554r = 0; /* r = run length of zeros */555556/* Manually unroll the k loop to eliminate the counter variable. This557* improves performance greatly on systems with a limited number of558* registers (such as x86.)559*/560#define kloop(jpeg_natural_order_of_k) { \561if ((temp = block[jpeg_natural_order_of_k]) == 0) { \562r++; \563} else { \564temp2 = temp; \565/* Branch-less absolute value, bitwise complement, etc., same as above */ \566temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \567temp ^= temp3; \568temp -= temp3; \569temp2 += temp3; \570nbits = JPEG_NBITS_NONZERO(temp); \571/* if run length > 15, must emit special run-length-16 codes (0xF0) */ \572while (r > 15) { \573EMIT_BITS(code_0xf0, size_0xf0) \574r -= 16; \575} \576/* Emit Huffman symbol for run length / number of bits */ \577temp3 = (r << 4) + nbits; \578code = actbl->ehufco[temp3]; \579size = actbl->ehufsi[temp3]; \580EMIT_CODE(code, size) \581r = 0; \582} \583}584585/* One iteration for each value in jpeg_natural_order[] */586kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);587kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);588kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);589kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);590kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);591kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);592kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);593kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);594kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);595kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);596kloop(55); kloop(62); kloop(63);597598/* If the last coef(s) were zero, emit an end-of-block code */599if (r > 0) {600code = actbl->ehufco[0];601size = actbl->ehufsi[0];602EMIT_BITS(code, size)603}604605state->cur.put_buffer = put_buffer;606state->cur.put_bits = put_bits;607STORE_BUFFER()608609return TRUE;610}611612613/*614* Emit a restart marker & resynchronize predictions.615*/616617LOCAL(boolean)618emit_restart (working_state *state, int restart_num)619{620int ci;621622if (! flush_bits(state))623return FALSE;624625emit_byte(state, 0xFF, return FALSE);626emit_byte(state, JPEG_RST0 + restart_num, return FALSE);627628/* Re-initialize DC predictions to 0 */629for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)630state->cur.last_dc_val[ci] = 0;631632/* The restart counter is not updated until we successfully write the MCU. */633634return TRUE;635}636637638/*639* Encode and output one MCU's worth of Huffman-compressed coefficients.640*/641642METHODDEF(boolean)643encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data)644{645huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;646working_state state;647int blkn, ci;648jpeg_component_info *compptr;649650/* Load up working state */651state.next_output_byte = cinfo->dest->next_output_byte;652state.free_in_buffer = cinfo->dest->free_in_buffer;653ASSIGN_STATE(state.cur, entropy->saved);654state.cinfo = cinfo;655656/* Emit restart marker if needed */657if (cinfo->restart_interval) {658if (entropy->restarts_to_go == 0)659if (! emit_restart(&state, entropy->next_restart_num))660return FALSE;661}662663/* Encode the MCU data blocks */664if (entropy->simd) {665for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {666ci = cinfo->MCU_membership[blkn];667compptr = cinfo->cur_comp_info[ci];668if (! encode_one_block_simd(&state,669MCU_data[blkn][0], state.cur.last_dc_val[ci],670entropy->dc_derived_tbls[compptr->dc_tbl_no],671entropy->ac_derived_tbls[compptr->ac_tbl_no]))672return FALSE;673/* Update last_dc_val */674state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];675}676} else {677for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {678ci = cinfo->MCU_membership[blkn];679compptr = cinfo->cur_comp_info[ci];680if (! encode_one_block(&state,681MCU_data[blkn][0], state.cur.last_dc_val[ci],682entropy->dc_derived_tbls[compptr->dc_tbl_no],683entropy->ac_derived_tbls[compptr->ac_tbl_no]))684return FALSE;685/* Update last_dc_val */686state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];687}688}689690/* Completed MCU, so update state */691cinfo->dest->next_output_byte = state.next_output_byte;692cinfo->dest->free_in_buffer = state.free_in_buffer;693ASSIGN_STATE(entropy->saved, state.cur);694695/* Update restart-interval state too */696if (cinfo->restart_interval) {697if (entropy->restarts_to_go == 0) {698entropy->restarts_to_go = cinfo->restart_interval;699entropy->next_restart_num++;700entropy->next_restart_num &= 7;701}702entropy->restarts_to_go--;703}704705return TRUE;706}707708709/*710* Finish up at the end of a Huffman-compressed scan.711*/712713METHODDEF(void)714finish_pass_huff (j_compress_ptr cinfo)715{716huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;717working_state state;718719/* Load up working state ... flush_bits needs it */720state.next_output_byte = cinfo->dest->next_output_byte;721state.free_in_buffer = cinfo->dest->free_in_buffer;722ASSIGN_STATE(state.cur, entropy->saved);723state.cinfo = cinfo;724725/* Flush out the last data */726if (! flush_bits(&state))727ERREXIT(cinfo, JERR_CANT_SUSPEND);728729/* Update state */730cinfo->dest->next_output_byte = state.next_output_byte;731cinfo->dest->free_in_buffer = state.free_in_buffer;732ASSIGN_STATE(entropy->saved, state.cur);733}734735736/*737* Huffman coding optimization.738*739* We first scan the supplied data and count the number of uses of each symbol740* that is to be Huffman-coded. (This process MUST agree with the code above.)741* Then we build a Huffman coding tree for the observed counts.742* Symbols which are not needed at all for the particular image are not743* assigned any code, which saves space in the DHT marker as well as in744* the compressed data.745*/746747#ifdef ENTROPY_OPT_SUPPORTED748749750/* Process a single block's worth of coefficients */751752LOCAL(void)753htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,754long dc_counts[], long ac_counts[])755{756register int temp;757register int nbits;758register int k, r;759760/* Encode the DC coefficient difference per section F.1.2.1 */761762temp = block[0] - last_dc_val;763if (temp < 0)764temp = -temp;765766/* Find the number of bits needed for the magnitude of the coefficient */767nbits = 0;768while (temp) {769nbits++;770temp >>= 1;771}772/* Check for out-of-range coefficient values.773* Since we're encoding a difference, the range limit is twice as much.774*/775if (nbits > MAX_COEF_BITS+1)776ERREXIT(cinfo, JERR_BAD_DCT_COEF);777778/* Count the Huffman symbol for the number of bits */779dc_counts[nbits]++;780781/* Encode the AC coefficients per section F.1.2.2 */782783r = 0; /* r = run length of zeros */784785for (k = 1; k < DCTSIZE2; k++) {786if ((temp = block[jpeg_natural_order[k]]) == 0) {787r++;788} else {789/* if run length > 15, must emit special run-length-16 codes (0xF0) */790while (r > 15) {791ac_counts[0xF0]++;792r -= 16;793}794795/* Find the number of bits needed for the magnitude of the coefficient */796if (temp < 0)797temp = -temp;798799/* Find the number of bits needed for the magnitude of the coefficient */800nbits = 1; /* there must be at least one 1 bit */801while ((temp >>= 1))802nbits++;803/* Check for out-of-range coefficient values */804if (nbits > MAX_COEF_BITS)805ERREXIT(cinfo, JERR_BAD_DCT_COEF);806807/* Count Huffman symbol for run length / number of bits */808ac_counts[(r << 4) + nbits]++;809810r = 0;811}812}813814/* If the last coef(s) were zero, emit an end-of-block code */815if (r > 0)816ac_counts[0]++;817}818819820/*821* Trial-encode one MCU's worth of Huffman-compressed coefficients.822* No data is actually output, so no suspension return is possible.823*/824825METHODDEF(boolean)826encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data)827{828huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;829int blkn, ci;830jpeg_component_info *compptr;831832/* Take care of restart intervals if needed */833if (cinfo->restart_interval) {834if (entropy->restarts_to_go == 0) {835/* Re-initialize DC predictions to 0 */836for (ci = 0; ci < cinfo->comps_in_scan; ci++)837entropy->saved.last_dc_val[ci] = 0;838/* Update restart state */839entropy->restarts_to_go = cinfo->restart_interval;840}841entropy->restarts_to_go--;842}843844for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {845ci = cinfo->MCU_membership[blkn];846compptr = cinfo->cur_comp_info[ci];847htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],848entropy->dc_count_ptrs[compptr->dc_tbl_no],849entropy->ac_count_ptrs[compptr->ac_tbl_no]);850entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];851}852853return TRUE;854}855856857/*858* Generate the best Huffman code table for the given counts, fill htbl.859* Note this is also used by jcphuff.c.860*861* The JPEG standard requires that no symbol be assigned a codeword of all862* one bits (so that padding bits added at the end of a compressed segment863* can't look like a valid code). Because of the canonical ordering of864* codewords, this just means that there must be an unused slot in the865* longest codeword length category. Section K.2 of the JPEG spec suggests866* reserving such a slot by pretending that symbol 256 is a valid symbol867* with count 1. In theory that's not optimal; giving it count zero but868* including it in the symbol set anyway should give a better Huffman code.869* But the theoretically better code actually seems to come out worse in870* practice, because it produces more all-ones bytes (which incur stuffed871* zero bytes in the final file). In any case the difference is tiny.872*873* The JPEG standard requires Huffman codes to be no more than 16 bits long.874* If some symbols have a very small but nonzero probability, the Huffman tree875* must be adjusted to meet the code length restriction. We currently use876* the adjustment method suggested in JPEG section K.2. This method is *not*877* optimal; it may not choose the best possible limited-length code. But878* typically only very-low-frequency symbols will be given less-than-optimal879* lengths, so the code is almost optimal. Experimental comparisons against880* an optimal limited-length-code algorithm indicate that the difference is881* microscopic --- usually less than a hundredth of a percent of total size.882* So the extra complexity of an optimal algorithm doesn't seem worthwhile.883*/884885GLOBAL(void)886jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])887{888#define MAX_CLEN 32 /* assumed maximum initial code length */889UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */890int codesize[257]; /* codesize[k] = code length of symbol k */891int others[257]; /* next symbol in current branch of tree */892int c1, c2;893int p, i, j;894long v;895896/* This algorithm is explained in section K.2 of the JPEG standard */897898MEMZERO(bits, sizeof(bits));899MEMZERO(codesize, sizeof(codesize));900for (i = 0; i < 257; i++)901others[i] = -1; /* init links to empty */902903freq[256] = 1; /* make sure 256 has a nonzero count */904/* Including the pseudo-symbol 256 in the Huffman procedure guarantees905* that no real symbol is given code-value of all ones, because 256906* will be placed last in the largest codeword category.907*/908909/* Huffman's basic algorithm to assign optimal code lengths to symbols */910911for (;;) {912/* Find the smallest nonzero frequency, set c1 = its symbol */913/* In case of ties, take the larger symbol number */914c1 = -1;915v = 1000000000L;916for (i = 0; i <= 256; i++) {917if (freq[i] && freq[i] <= v) {918v = freq[i];919c1 = i;920}921}922923/* Find the next smallest nonzero frequency, set c2 = its symbol */924/* In case of ties, take the larger symbol number */925c2 = -1;926v = 1000000000L;927for (i = 0; i <= 256; i++) {928if (freq[i] && freq[i] <= v && i != c1) {929v = freq[i];930c2 = i;931}932}933934/* Done if we've merged everything into one frequency */935if (c2 < 0)936break;937938/* Else merge the two counts/trees */939freq[c1] += freq[c2];940freq[c2] = 0;941942/* Increment the codesize of everything in c1's tree branch */943codesize[c1]++;944while (others[c1] >= 0) {945c1 = others[c1];946codesize[c1]++;947}948949others[c1] = c2; /* chain c2 onto c1's tree branch */950951/* Increment the codesize of everything in c2's tree branch */952codesize[c2]++;953while (others[c2] >= 0) {954c2 = others[c2];955codesize[c2]++;956}957}958959/* Now count the number of symbols of each code length */960for (i = 0; i <= 256; i++) {961if (codesize[i]) {962/* The JPEG standard seems to think that this can't happen, */963/* but I'm paranoid... */964if (codesize[i] > MAX_CLEN)965ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);966967bits[codesize[i]]++;968}969}970971/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure972* Huffman procedure assigned any such lengths, we must adjust the coding.973* Here is what the JPEG spec says about how this next bit works:974* Since symbols are paired for the longest Huffman code, the symbols are975* removed from this length category two at a time. The prefix for the pair976* (which is one bit shorter) is allocated to one of the pair; then,977* skipping the BITS entry for that prefix length, a code word from the next978* shortest nonzero BITS entry is converted into a prefix for two code words979* one bit longer.980*/981982for (i = MAX_CLEN; i > 16; i--) {983while (bits[i] > 0) {984j = i - 2; /* find length of new prefix to be used */985while (bits[j] == 0)986j--;987988bits[i] -= 2; /* remove two symbols */989bits[i-1]++; /* one goes in this length */990bits[j+1] += 2; /* two new symbols in this length */991bits[j]--; /* symbol of this length is now a prefix */992}993}994995/* Remove the count for the pseudo-symbol 256 from the largest codelength */996while (bits[i] == 0) /* find largest codelength still in use */997i--;998bits[i]--;9991000/* Return final symbol counts (only for lengths 0..16) */1001MEMCOPY(htbl->bits, bits, sizeof(htbl->bits));10021003/* Return a list of the symbols sorted by code length */1004/* It's not real clear to me why we don't need to consider the codelength1005* changes made above, but the JPEG spec seems to think this works.1006*/1007p = 0;1008for (i = 1; i <= MAX_CLEN; i++) {1009for (j = 0; j <= 255; j++) {1010if (codesize[j] == i) {1011htbl->huffval[p] = (UINT8) j;1012p++;1013}1014}1015}10161017/* Set sent_table FALSE so updated table will be written to JPEG file. */1018htbl->sent_table = FALSE;1019}102010211022/*1023* Finish up a statistics-gathering pass and create the new Huffman tables.1024*/10251026METHODDEF(void)1027finish_pass_gather (j_compress_ptr cinfo)1028{1029huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;1030int ci, dctbl, actbl;1031jpeg_component_info *compptr;1032JHUFF_TBL **htblptr;1033boolean did_dc[NUM_HUFF_TBLS];1034boolean did_ac[NUM_HUFF_TBLS];10351036/* It's important not to apply jpeg_gen_optimal_table more than once1037* per table, because it clobbers the input frequency counts!1038*/1039MEMZERO(did_dc, sizeof(did_dc));1040MEMZERO(did_ac, sizeof(did_ac));10411042for (ci = 0; ci < cinfo->comps_in_scan; ci++) {1043compptr = cinfo->cur_comp_info[ci];1044dctbl = compptr->dc_tbl_no;1045actbl = compptr->ac_tbl_no;1046if (! did_dc[dctbl]) {1047htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl];1048if (*htblptr == NULL)1049*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);1050jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);1051did_dc[dctbl] = TRUE;1052}1053if (! did_ac[actbl]) {1054htblptr = & cinfo->ac_huff_tbl_ptrs[actbl];1055if (*htblptr == NULL)1056*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);1057jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);1058did_ac[actbl] = TRUE;1059}1060}1061}106210631064#endif /* ENTROPY_OPT_SUPPORTED */106510661067/*1068* Module initialization routine for Huffman entropy encoding.1069*/10701071GLOBAL(void)1072jinit_huff_encoder (j_compress_ptr cinfo)1073{1074huff_entropy_ptr entropy;1075int i;10761077entropy = (huff_entropy_ptr)1078(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,1079sizeof(huff_entropy_encoder));1080cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;1081entropy->pub.start_pass = start_pass_huff;10821083/* Mark tables unallocated */1084for (i = 0; i < NUM_HUFF_TBLS; i++) {1085entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;1086#ifdef ENTROPY_OPT_SUPPORTED1087entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;1088#endif1089}1090}109110921093