Path: blob/master/thirdparty/libjpeg-turbo/src/jchuff.c
9904 views
/*1* jchuff.c2*3* This file was part of the Independent JPEG Group's software:4* Copyright (C) 1991-1997, Thomas G. Lane.5* Lossless JPEG Modifications:6* Copyright (C) 1999, Ken Murchison.7* libjpeg-turbo Modifications:8* Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.9* Copyright (C) 2015, Matthieu Darbois.10* Copyright (C) 2018, Matthias Räncker.11* Copyright (C) 2020, Arm Limited.12* Copyright (C) 2022, Felix Hanau.13* For conditions of distribution and use, see the accompanying README.ijg14* file.15*16* This file contains Huffman entropy encoding routines.17*18* Much of the complexity here has to do with supporting output suspension.19* If the data destination module demands suspension, we want to be able to20* back up to the start of the current MCU. To do this, we copy state21* variables into local working storage, and update them back to the22* permanent JPEG objects only upon successful completion of an MCU.23*24* NOTE: All referenced figures are from25* Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.26*/2728#define JPEG_INTERNALS29#include "jinclude.h"30#include "jpeglib.h"31#ifdef WITH_SIMD32#include "jsimd.h"33#else34#include "jchuff.h" /* Declarations shared with jc*huff.c */35#endif36#include <limits.h>37#include "jpeg_nbits.h"383940/* Expanded entropy encoder object for Huffman encoding.41*42* The savable_state subrecord contains fields that change within an MCU,43* but must not be updated permanently until we complete the MCU.44*/4546#if defined(__x86_64__) && defined(__ILP32__)47typedef unsigned long long bit_buf_type;48#else49typedef size_t bit_buf_type;50#endif5152/* NOTE: The more optimal Huffman encoding algorithm is only used by the53* intrinsics implementation of the Arm Neon SIMD extensions, which is why we54* retain the old Huffman encoder behavior when using the GAS implementation.55*/56#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \57defined(_M_ARM) || defined(_M_ARM64))58typedef unsigned long long simd_bit_buf_type;59#else60typedef bit_buf_type simd_bit_buf_type;61#endif6263#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \64(defined(__x86_64__) && defined(__ILP32__))65#define BIT_BUF_SIZE 6466#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)67#define BIT_BUF_SIZE 3268#else69#error Cannot determine word size70#endif71#define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8)7273typedef struct {74union {75bit_buf_type c;76#ifdef WITH_SIMD77simd_bit_buf_type simd;78#endif79} put_buffer; /* current bit accumulation buffer */80int free_bits; /* # of bits available in it */81/* (Neon GAS: # of bits now in it) */82int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */83} savable_state;8485typedef struct {86struct jpeg_entropy_encoder pub; /* public fields */8788savable_state saved; /* Bit buffer & DC state at start of MCU */8990/* These fields are NOT loaded into local working state. */91unsigned int restarts_to_go; /* MCUs left in this restart interval */92int next_restart_num; /* next restart number to write (0-7) */9394/* Pointers to derived tables (these workspaces have image lifespan) */95c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];96c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];9798#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */99long *dc_count_ptrs[NUM_HUFF_TBLS];100long *ac_count_ptrs[NUM_HUFF_TBLS];101#endif102103#ifdef WITH_SIMD104int simd;105#endif106} huff_entropy_encoder;107108typedef huff_entropy_encoder *huff_entropy_ptr;109110/* Working state while writing an MCU.111* This struct contains all the fields that are needed by subroutines.112*/113114typedef struct {115JOCTET *next_output_byte; /* => next byte to write in buffer */116size_t free_in_buffer; /* # of byte spaces remaining in buffer */117savable_state cur; /* Current bit buffer & DC state */118j_compress_ptr cinfo; /* dump_buffer needs access to this */119#ifdef WITH_SIMD120int simd;121#endif122} working_state;123124125/* Forward declarations */126METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data);127METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);128#ifdef ENTROPY_OPT_SUPPORTED129METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo,130JBLOCKROW *MCU_data);131METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo);132#endif133134135/*136* Initialize for a Huffman-compressed scan.137* If gather_statistics is TRUE, we do not output anything during the scan,138* just count the Huffman symbols used and generate Huffman code tables.139*/140141METHODDEF(void)142start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)143{144huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;145int ci, dctbl, actbl;146jpeg_component_info *compptr;147148if (gather_statistics) {149#ifdef ENTROPY_OPT_SUPPORTED150entropy->pub.encode_mcu = encode_mcu_gather;151entropy->pub.finish_pass = finish_pass_gather;152#else153ERREXIT(cinfo, JERR_NOT_COMPILED);154#endif155} else {156entropy->pub.encode_mcu = encode_mcu_huff;157entropy->pub.finish_pass = finish_pass_huff;158}159160#ifdef WITH_SIMD161entropy->simd = jsimd_can_huff_encode_one_block();162#endif163164for (ci = 0; ci < cinfo->comps_in_scan; ci++) {165compptr = cinfo->cur_comp_info[ci];166dctbl = compptr->dc_tbl_no;167actbl = compptr->ac_tbl_no;168if (gather_statistics) {169#ifdef ENTROPY_OPT_SUPPORTED170/* Check for invalid table indexes */171/* (make_c_derived_tbl does this in the other path) */172if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)173ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);174if (actbl < 0 || actbl >= NUM_HUFF_TBLS)175ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);176/* Allocate and zero the statistics tables */177/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */178if (entropy->dc_count_ptrs[dctbl] == NULL)179entropy->dc_count_ptrs[dctbl] = (long *)180(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,181257 * sizeof(long));182memset(entropy->dc_count_ptrs[dctbl], 0, 257 * sizeof(long));183if (entropy->ac_count_ptrs[actbl] == NULL)184entropy->ac_count_ptrs[actbl] = (long *)185(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,186257 * sizeof(long));187memset(entropy->ac_count_ptrs[actbl], 0, 257 * sizeof(long));188#endif189} else {190/* Compute derived values for Huffman tables */191/* We may do this more than once for a table, but it's not expensive */192jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,193&entropy->dc_derived_tbls[dctbl]);194jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,195&entropy->ac_derived_tbls[actbl]);196}197/* Initialize DC predictions to 0 */198entropy->saved.last_dc_val[ci] = 0;199}200201/* Initialize bit buffer to empty */202#ifdef WITH_SIMD203if (entropy->simd) {204entropy->saved.put_buffer.simd = 0;205#if defined(__aarch64__) && !defined(NEON_INTRINSICS)206entropy->saved.free_bits = 0;207#else208entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;209#endif210} else211#endif212{213entropy->saved.put_buffer.c = 0;214entropy->saved.free_bits = BIT_BUF_SIZE;215}216217/* Initialize restart stuff */218entropy->restarts_to_go = cinfo->restart_interval;219entropy->next_restart_num = 0;220}221222223/*224* Compute the derived values for a Huffman table.225* This routine also performs some validation checks on the table.226*227* Note this is also used by jcphuff.c and jclhuff.c.228*/229230GLOBAL(void)231jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,232c_derived_tbl **pdtbl)233{234JHUFF_TBL *htbl;235c_derived_tbl *dtbl;236int p, i, l, lastp, si, maxsymbol;237char huffsize[257];238unsigned int huffcode[257];239unsigned int code;240241/* Note that huffsize[] and huffcode[] are filled in code-length order,242* paralleling the order of the symbols themselves in htbl->huffval[].243*/244245/* Find the input Huffman table */246if (tblno < 0 || tblno >= NUM_HUFF_TBLS)247ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);248htbl =249isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];250if (htbl == NULL)251ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);252253/* Allocate a workspace if we haven't already done so. */254if (*pdtbl == NULL)255*pdtbl = (c_derived_tbl *)256(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,257sizeof(c_derived_tbl));258dtbl = *pdtbl;259260/* Figure C.1: make table of Huffman code length for each symbol */261262p = 0;263for (l = 1; l <= 16; l++) {264i = (int)htbl->bits[l];265if (i < 0 || p + i > 256) /* protect against table overrun */266ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);267while (i--)268huffsize[p++] = (char)l;269}270huffsize[p] = 0;271lastp = p;272273/* Figure C.2: generate the codes themselves */274/* We also validate that the counts represent a legal Huffman code tree. */275276code = 0;277si = huffsize[0];278p = 0;279while (huffsize[p]) {280while (((int)huffsize[p]) == si) {281huffcode[p++] = code;282code++;283}284/* code is now 1 more than the last code used for codelength si; but285* it must still fit in si bits, since no code is allowed to be all ones.286*/287if (((JLONG)code) >= (((JLONG)1) << si))288ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);289code <<= 1;290si++;291}292293/* Figure C.3: generate encoding tables */294/* These are code and size indexed by symbol value */295296/* Set all codeless symbols to have code length 0;297* this lets us detect duplicate VAL entries here, and later298* allows emit_bits to detect any attempt to emit such symbols.299*/300memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));301memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));302303/* This is also a convenient place to check for out-of-range and duplicated304* VAL entries. We allow 0..255 for AC symbols but only 0..15 for DC in305* lossy mode and 0..16 for DC in lossless mode. (We could constrain them306* further based on data depth and mode, but this seems enough.)307*/308maxsymbol = isDC ? (cinfo->master->lossless ? 16 : 15) : 255;309310for (p = 0; p < lastp; p++) {311i = htbl->huffval[p];312if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])313ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);314dtbl->ehufco[i] = huffcode[p];315dtbl->ehufsi[i] = huffsize[p];316}317}318319320/* Outputting bytes to the file */321322/* Emit a byte, taking 'action' if must suspend. */323#define emit_byte(state, val, action) { \324*(state)->next_output_byte++ = (JOCTET)(val); \325if (--(state)->free_in_buffer == 0) \326if (!dump_buffer(state)) \327{ action; } \328}329330331LOCAL(boolean)332dump_buffer(working_state *state)333/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */334{335struct jpeg_destination_mgr *dest = state->cinfo->dest;336337if (!(*dest->empty_output_buffer) (state->cinfo))338return FALSE;339/* After a successful buffer dump, must reset buffer pointers */340state->next_output_byte = dest->next_output_byte;341state->free_in_buffer = dest->free_in_buffer;342return TRUE;343}344345346/* Outputting bits to the file */347348/* Output byte b and, speculatively, an additional 0 byte. 0xFF must be349* encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the350* byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and351* the speculative 0 byte will be overwritten by the next byte.352*/353#define EMIT_BYTE(b) { \354buffer[0] = (JOCTET)(b); \355buffer[1] = 0; \356buffer -= -2 + ((JOCTET)(b) < 0xFF); \357}358359/* Output the entire bit buffer. If there are no 0xFF bytes in it, then write360* directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to361* encode 0xFF as 0xFF 0x00.362*/363#if BIT_BUF_SIZE == 64364365#define FLUSH() { \366if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \367EMIT_BYTE(put_buffer >> 56) \368EMIT_BYTE(put_buffer >> 48) \369EMIT_BYTE(put_buffer >> 40) \370EMIT_BYTE(put_buffer >> 32) \371EMIT_BYTE(put_buffer >> 24) \372EMIT_BYTE(put_buffer >> 16) \373EMIT_BYTE(put_buffer >> 8) \374EMIT_BYTE(put_buffer ) \375} else { \376buffer[0] = (JOCTET)(put_buffer >> 56); \377buffer[1] = (JOCTET)(put_buffer >> 48); \378buffer[2] = (JOCTET)(put_buffer >> 40); \379buffer[3] = (JOCTET)(put_buffer >> 32); \380buffer[4] = (JOCTET)(put_buffer >> 24); \381buffer[5] = (JOCTET)(put_buffer >> 16); \382buffer[6] = (JOCTET)(put_buffer >> 8); \383buffer[7] = (JOCTET)(put_buffer); \384buffer += 8; \385} \386}387388#else389390#define FLUSH() { \391if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \392EMIT_BYTE(put_buffer >> 24) \393EMIT_BYTE(put_buffer >> 16) \394EMIT_BYTE(put_buffer >> 8) \395EMIT_BYTE(put_buffer ) \396} else { \397buffer[0] = (JOCTET)(put_buffer >> 24); \398buffer[1] = (JOCTET)(put_buffer >> 16); \399buffer[2] = (JOCTET)(put_buffer >> 8); \400buffer[3] = (JOCTET)(put_buffer); \401buffer += 4; \402} \403}404405#endif406407/* Fill the bit buffer to capacity with the leading bits from code, then output408* the bit buffer and put the remaining bits from code into the bit buffer.409*/410#define PUT_AND_FLUSH(code, size) { \411put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \412FLUSH() \413free_bits += BIT_BUF_SIZE; \414put_buffer = code; \415}416417/* Insert code into the bit buffer and output the bit buffer if needed.418* NOTE: We can't flush with free_bits == 0, since the left shift in419* PUT_AND_FLUSH() would have undefined behavior.420*/421#define PUT_BITS(code, size) { \422free_bits -= size; \423if (free_bits < 0) \424PUT_AND_FLUSH(code, size) \425else \426put_buffer = (put_buffer << size) | code; \427}428429#define PUT_CODE(code, size) { \430temp &= (((JLONG)1) << nbits) - 1; \431temp |= code << nbits; \432nbits += size; \433PUT_BITS(temp, nbits) \434}435436437/* Although it is exceedingly rare, it is possible for a Huffman-encoded438* coefficient block to be larger than the 128-byte unencoded block. For each439* of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can440* theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per441* encoded block.) If, for instance, one artificially sets the AC442* coefficients to alternating values of 32767 and -32768 (using the JPEG443* scanning order-- 1, 8, 16, etc.), then this will produce an encoded block444* larger than 200 bytes.445*/446#define BUFSIZE (DCTSIZE2 * 8)447448#define LOAD_BUFFER() { \449if (state->free_in_buffer < BUFSIZE) { \450localbuf = 1; \451buffer = _buffer; \452} else \453buffer = state->next_output_byte; \454}455456#define STORE_BUFFER() { \457if (localbuf) { \458size_t bytes, bytestocopy; \459bytes = buffer - _buffer; \460buffer = _buffer; \461while (bytes > 0) { \462bytestocopy = MIN(bytes, state->free_in_buffer); \463memcpy(state->next_output_byte, buffer, bytestocopy); \464state->next_output_byte += bytestocopy; \465buffer += bytestocopy; \466state->free_in_buffer -= bytestocopy; \467if (state->free_in_buffer == 0) \468if (!dump_buffer(state)) return FALSE; \469bytes -= bytestocopy; \470} \471} else { \472state->free_in_buffer -= (buffer - state->next_output_byte); \473state->next_output_byte = buffer; \474} \475}476477478LOCAL(boolean)479flush_bits(working_state *state)480{481JOCTET _buffer[BUFSIZE], *buffer, temp;482simd_bit_buf_type put_buffer; int put_bits;483int localbuf = 0;484485#ifdef WITH_SIMD486if (state->simd) {487#if defined(__aarch64__) && !defined(NEON_INTRINSICS)488put_bits = state->cur.free_bits;489#else490put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;491#endif492put_buffer = state->cur.put_buffer.simd;493} else494#endif495{496put_bits = BIT_BUF_SIZE - state->cur.free_bits;497put_buffer = state->cur.put_buffer.c;498}499500LOAD_BUFFER()501502while (put_bits >= 8) {503put_bits -= 8;504temp = (JOCTET)(put_buffer >> put_bits);505EMIT_BYTE(temp)506}507if (put_bits) {508/* fill partial byte with ones */509temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));510EMIT_BYTE(temp)511}512513#ifdef WITH_SIMD514if (state->simd) { /* and reset bit buffer to empty */515state->cur.put_buffer.simd = 0;516#if defined(__aarch64__) && !defined(NEON_INTRINSICS)517state->cur.free_bits = 0;518#else519state->cur.free_bits = SIMD_BIT_BUF_SIZE;520#endif521} else522#endif523{524state->cur.put_buffer.c = 0;525state->cur.free_bits = BIT_BUF_SIZE;526}527STORE_BUFFER()528529return TRUE;530}531532533#ifdef WITH_SIMD534535/* Encode a single block's worth of coefficients */536537LOCAL(boolean)538encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,539c_derived_tbl *dctbl, c_derived_tbl *actbl)540{541JOCTET _buffer[BUFSIZE], *buffer;542int localbuf = 0;543544#ifdef ZERO_BUFFERS545memset(_buffer, 0, sizeof(_buffer));546#endif547548LOAD_BUFFER()549550buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val,551dctbl, actbl);552553STORE_BUFFER()554555return TRUE;556}557558#endif559560LOCAL(boolean)561encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,562c_derived_tbl *dctbl, c_derived_tbl *actbl)563{564int temp, nbits, free_bits;565bit_buf_type put_buffer;566JOCTET _buffer[BUFSIZE], *buffer;567int localbuf = 0;568int max_coef_bits = state->cinfo->data_precision + 2;569570free_bits = state->cur.free_bits;571put_buffer = state->cur.put_buffer.c;572LOAD_BUFFER()573574/* Encode the DC coefficient difference per section F.1.2.1 */575576temp = block[0] - last_dc_val;577578/* This is a well-known technique for obtaining the absolute value without a579* branch. It is derived from an assembly language technique presented in580* "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by581* Agner Fog. This code assumes we are on a two's complement machine.582*/583nbits = temp >> (CHAR_BIT * sizeof(int) - 1);584temp += nbits;585nbits ^= temp;586587/* Find the number of bits needed for the magnitude of the coefficient */588nbits = JPEG_NBITS(nbits);589/* Check for out-of-range coefficient values.590* Since we're encoding a difference, the range limit is twice as much.591*/592if (nbits > max_coef_bits + 1)593ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);594595/* Emit the Huffman-coded symbol for the number of bits.596* Emit that number of bits of the value, if positive,597* or the complement of its magnitude, if negative.598*/599PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])600601/* Encode the AC coefficients per section F.1.2.2 */602603{604int r = 0; /* r = run length of zeros */605606/* Manually unroll the k loop to eliminate the counter variable. This607* improves performance greatly on systems with a limited number of608* registers (such as x86.)609*/610#define kloop(jpeg_natural_order_of_k) { \611if ((temp = block[jpeg_natural_order_of_k]) == 0) { \612r += 16; \613} else { \614/* Branch-less absolute value, bitwise complement, etc., same as above */ \615nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \616temp += nbits; \617nbits ^= temp; \618nbits = JPEG_NBITS_NONZERO(nbits); \619/* Check for out-of-range coefficient values */ \620if (nbits > max_coef_bits) \621ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); \622/* if run length > 15, must emit special run-length-16 codes (0xF0) */ \623while (r >= 16 * 16) { \624r -= 16 * 16; \625PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \626} \627/* Emit Huffman symbol for run length / number of bits */ \628r += nbits; \629PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \630r = 0; \631} \632}633634/* One iteration for each value in jpeg_natural_order[] */635kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);636kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);637kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);638kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);639kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);640kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);641kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);642kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);643kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);644kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);645kloop(55); kloop(62); kloop(63);646647/* If the last coef(s) were zero, emit an end-of-block code */648if (r > 0) {649PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])650}651}652653state->cur.put_buffer.c = put_buffer;654state->cur.free_bits = free_bits;655STORE_BUFFER()656657return TRUE;658}659660661/*662* Emit a restart marker & resynchronize predictions.663*/664665LOCAL(boolean)666emit_restart(working_state *state, int restart_num)667{668int ci;669670if (!flush_bits(state))671return FALSE;672673emit_byte(state, 0xFF, return FALSE);674emit_byte(state, JPEG_RST0 + restart_num, return FALSE);675676/* Re-initialize DC predictions to 0 */677for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)678state->cur.last_dc_val[ci] = 0;679680/* The restart counter is not updated until we successfully write the MCU. */681682return TRUE;683}684685686/*687* Encode and output one MCU's worth of Huffman-compressed coefficients.688*/689690METHODDEF(boolean)691encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)692{693huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;694working_state state;695int blkn, ci;696jpeg_component_info *compptr;697698/* Load up working state */699state.next_output_byte = cinfo->dest->next_output_byte;700state.free_in_buffer = cinfo->dest->free_in_buffer;701state.cur = entropy->saved;702state.cinfo = cinfo;703#ifdef WITH_SIMD704state.simd = entropy->simd;705#endif706707/* Emit restart marker if needed */708if (cinfo->restart_interval) {709if (entropy->restarts_to_go == 0)710if (!emit_restart(&state, entropy->next_restart_num))711return FALSE;712}713714/* Encode the MCU data blocks */715#ifdef WITH_SIMD716if (entropy->simd) {717for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {718ci = cinfo->MCU_membership[blkn];719compptr = cinfo->cur_comp_info[ci];720if (!encode_one_block_simd(&state,721MCU_data[blkn][0], state.cur.last_dc_val[ci],722entropy->dc_derived_tbls[compptr->dc_tbl_no],723entropy->ac_derived_tbls[compptr->ac_tbl_no]))724return FALSE;725/* Update last_dc_val */726state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];727}728} else729#endif730{731for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {732ci = cinfo->MCU_membership[blkn];733compptr = cinfo->cur_comp_info[ci];734if (!encode_one_block(&state,735MCU_data[blkn][0], state.cur.last_dc_val[ci],736entropy->dc_derived_tbls[compptr->dc_tbl_no],737entropy->ac_derived_tbls[compptr->ac_tbl_no]))738return FALSE;739/* Update last_dc_val */740state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];741}742}743744/* Completed MCU, so update state */745cinfo->dest->next_output_byte = state.next_output_byte;746cinfo->dest->free_in_buffer = state.free_in_buffer;747entropy->saved = state.cur;748749/* Update restart-interval state too */750if (cinfo->restart_interval) {751if (entropy->restarts_to_go == 0) {752entropy->restarts_to_go = cinfo->restart_interval;753entropy->next_restart_num++;754entropy->next_restart_num &= 7;755}756entropy->restarts_to_go--;757}758759return TRUE;760}761762763/*764* Finish up at the end of a Huffman-compressed scan.765*/766767METHODDEF(void)768finish_pass_huff(j_compress_ptr cinfo)769{770huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;771working_state state;772773/* Load up working state ... flush_bits needs it */774state.next_output_byte = cinfo->dest->next_output_byte;775state.free_in_buffer = cinfo->dest->free_in_buffer;776state.cur = entropy->saved;777state.cinfo = cinfo;778#ifdef WITH_SIMD779state.simd = entropy->simd;780#endif781782/* Flush out the last data */783if (!flush_bits(&state))784ERREXIT(cinfo, JERR_CANT_SUSPEND);785786/* Update state */787cinfo->dest->next_output_byte = state.next_output_byte;788cinfo->dest->free_in_buffer = state.free_in_buffer;789entropy->saved = state.cur;790}791792793/*794* Huffman coding optimization.795*796* We first scan the supplied data and count the number of uses of each symbol797* that is to be Huffman-coded. (This process MUST agree with the code above.)798* Then we build a Huffman coding tree for the observed counts.799* Symbols which are not needed at all for the particular image are not800* assigned any code, which saves space in the DHT marker as well as in801* the compressed data.802*/803804#ifdef ENTROPY_OPT_SUPPORTED805806807/* Process a single block's worth of coefficients */808809LOCAL(void)810htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,811long dc_counts[], long ac_counts[])812{813register int temp;814register int nbits;815register int k, r;816int max_coef_bits = cinfo->data_precision + 2;817818/* Encode the DC coefficient difference per section F.1.2.1 */819820temp = block[0] - last_dc_val;821if (temp < 0)822temp = -temp;823824/* Find the number of bits needed for the magnitude of the coefficient */825nbits = 0;826while (temp) {827nbits++;828temp >>= 1;829}830/* Check for out-of-range coefficient values.831* Since we're encoding a difference, the range limit is twice as much.832*/833if (nbits > max_coef_bits + 1)834ERREXIT(cinfo, JERR_BAD_DCT_COEF);835836/* Count the Huffman symbol for the number of bits */837dc_counts[nbits]++;838839/* Encode the AC coefficients per section F.1.2.2 */840841r = 0; /* r = run length of zeros */842843for (k = 1; k < DCTSIZE2; k++) {844if ((temp = block[jpeg_natural_order[k]]) == 0) {845r++;846} else {847/* if run length > 15, must emit special run-length-16 codes (0xF0) */848while (r > 15) {849ac_counts[0xF0]++;850r -= 16;851}852853/* Find the number of bits needed for the magnitude of the coefficient */854if (temp < 0)855temp = -temp;856857/* Find the number of bits needed for the magnitude of the coefficient */858nbits = 1; /* there must be at least one 1 bit */859while ((temp >>= 1))860nbits++;861/* Check for out-of-range coefficient values */862if (nbits > max_coef_bits)863ERREXIT(cinfo, JERR_BAD_DCT_COEF);864865/* Count Huffman symbol for run length / number of bits */866ac_counts[(r << 4) + nbits]++;867868r = 0;869}870}871872/* If the last coef(s) were zero, emit an end-of-block code */873if (r > 0)874ac_counts[0]++;875}876877878/*879* Trial-encode one MCU's worth of Huffman-compressed coefficients.880* No data is actually output, so no suspension return is possible.881*/882883METHODDEF(boolean)884encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data)885{886huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;887int blkn, ci;888jpeg_component_info *compptr;889890/* Take care of restart intervals if needed */891if (cinfo->restart_interval) {892if (entropy->restarts_to_go == 0) {893/* Re-initialize DC predictions to 0 */894for (ci = 0; ci < cinfo->comps_in_scan; ci++)895entropy->saved.last_dc_val[ci] = 0;896/* Update restart state */897entropy->restarts_to_go = cinfo->restart_interval;898}899entropy->restarts_to_go--;900}901902for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {903ci = cinfo->MCU_membership[blkn];904compptr = cinfo->cur_comp_info[ci];905htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],906entropy->dc_count_ptrs[compptr->dc_tbl_no],907entropy->ac_count_ptrs[compptr->ac_tbl_no]);908entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];909}910911return TRUE;912}913914915/*916* Generate the best Huffman code table for the given counts, fill htbl.917* Note this is also used by jcphuff.c and jclhuff.c.918*919* The JPEG standard requires that no symbol be assigned a codeword of all920* one bits (so that padding bits added at the end of a compressed segment921* can't look like a valid code). Because of the canonical ordering of922* codewords, this just means that there must be an unused slot in the923* longest codeword length category. Annex K (Clause K.2) of924* Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot925* by pretending that symbol 256 is a valid symbol with count 1. In theory926* that's not optimal; giving it count zero but including it in the symbol set927* anyway should give a better Huffman code. But the theoretically better code928* actually seems to come out worse in practice, because it produces more929* all-ones bytes (which incur stuffed zero bytes in the final file). In any930* case the difference is tiny.931*932* The JPEG standard requires Huffman codes to be no more than 16 bits long.933* If some symbols have a very small but nonzero probability, the Huffman tree934* must be adjusted to meet the code length restriction. We currently use935* the adjustment method suggested in JPEG section K.2. This method is *not*936* optimal; it may not choose the best possible limited-length code. But937* typically only very-low-frequency symbols will be given less-than-optimal938* lengths, so the code is almost optimal. Experimental comparisons against939* an optimal limited-length-code algorithm indicate that the difference is940* microscopic --- usually less than a hundredth of a percent of total size.941* So the extra complexity of an optimal algorithm doesn't seem worthwhile.942*/943944GLOBAL(void)945jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])946{947#define MAX_CLEN 32 /* assumed maximum initial code length */948UINT8 bits[MAX_CLEN + 1]; /* bits[k] = # of symbols with code length k */949int bit_pos[MAX_CLEN + 1]; /* # of symbols with smaller code length */950int codesize[257]; /* codesize[k] = code length of symbol k */951int nz_index[257]; /* index of nonzero symbol in the original freq952array */953int others[257]; /* next symbol in current branch of tree */954int c1, c2;955int p, i, j;956int num_nz_symbols;957long v, v2;958959/* This algorithm is explained in section K.2 of the JPEG standard */960961memset(bits, 0, sizeof(bits));962memset(codesize, 0, sizeof(codesize));963for (i = 0; i < 257; i++)964others[i] = -1; /* init links to empty */965966freq[256] = 1; /* make sure 256 has a nonzero count */967/* Including the pseudo-symbol 256 in the Huffman procedure guarantees968* that no real symbol is given code-value of all ones, because 256969* will be placed last in the largest codeword category.970*/971972/* Group nonzero frequencies together so we can more easily find the973* smallest.974*/975num_nz_symbols = 0;976for (i = 0; i < 257; i++) {977if (freq[i]) {978nz_index[num_nz_symbols] = i;979freq[num_nz_symbols] = freq[i];980num_nz_symbols++;981}982}983984/* Huffman's basic algorithm to assign optimal code lengths to symbols */985986for (;;) {987/* Find the two smallest nonzero frequencies; set c1, c2 = their symbols */988/* In case of ties, take the larger symbol number. Since we have grouped989* the nonzero symbols together, checking for zero symbols is not990* necessary.991*/992c1 = -1;993c2 = -1;994v = 1000000000L;995v2 = 1000000000L;996for (i = 0; i < num_nz_symbols; i++) {997if (freq[i] <= v2) {998if (freq[i] <= v) {999c2 = c1;1000v2 = v;1001v = freq[i];1002c1 = i;1003} else {1004v2 = freq[i];1005c2 = i;1006}1007}1008}10091010/* Done if we've merged everything into one frequency */1011if (c2 < 0)1012break;10131014/* Else merge the two counts/trees */1015freq[c1] += freq[c2];1016/* Set the frequency to a very high value instead of zero, so we don't have1017* to check for zero values.1018*/1019freq[c2] = 1000000001L;10201021/* Increment the codesize of everything in c1's tree branch */1022codesize[c1]++;1023while (others[c1] >= 0) {1024c1 = others[c1];1025codesize[c1]++;1026}10271028others[c1] = c2; /* chain c2 onto c1's tree branch */10291030/* Increment the codesize of everything in c2's tree branch */1031codesize[c2]++;1032while (others[c2] >= 0) {1033c2 = others[c2];1034codesize[c2]++;1035}1036}10371038/* Now count the number of symbols of each code length */1039for (i = 0; i < num_nz_symbols; i++) {1040/* The JPEG standard seems to think that this can't happen, */1041/* but I'm paranoid... */1042if (codesize[i] > MAX_CLEN)1043ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);10441045bits[codesize[i]]++;1046}10471048/* Count the number of symbols with a length smaller than i bits, so we can1049* construct the symbol table more efficiently. Note that this includes the1050* pseudo-symbol 256, but since it is the last symbol, it will not affect the1051* table.1052*/1053p = 0;1054for (i = 1; i <= MAX_CLEN; i++) {1055bit_pos[i] = p;1056p += bits[i];1057}10581059/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure1060* Huffman procedure assigned any such lengths, we must adjust the coding.1061* Here is what Rec. ITU-T T.81 | ISO/IEC 10918-1 says about how this next1062* bit works: Since symbols are paired for the longest Huffman code, the1063* symbols are removed from this length category two at a time. The prefix1064* for the pair (which is one bit shorter) is allocated to one of the pair;1065* then, skipping the BITS entry for that prefix length, a code word from the1066* next shortest nonzero BITS entry is converted into a prefix for two code1067* words one bit longer.1068*/10691070for (i = MAX_CLEN; i > 16; i--) {1071while (bits[i] > 0) {1072j = i - 2; /* find length of new prefix to be used */1073while (bits[j] == 0)1074j--;10751076bits[i] -= 2; /* remove two symbols */1077bits[i - 1]++; /* one goes in this length */1078bits[j + 1] += 2; /* two new symbols in this length */1079bits[j]--; /* symbol of this length is now a prefix */1080}1081}10821083/* Remove the count for the pseudo-symbol 256 from the largest codelength */1084while (bits[i] == 0) /* find largest codelength still in use */1085i--;1086bits[i]--;10871088/* Return final symbol counts (only for lengths 0..16) */1089memcpy(htbl->bits, bits, sizeof(htbl->bits));10901091/* Return a list of the symbols sorted by code length */1092/* It's not real clear to me why we don't need to consider the codelength1093* changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think1094* this works.1095*/1096for (i = 0; i < num_nz_symbols - 1; i++) {1097htbl->huffval[bit_pos[codesize[i]]] = (UINT8)nz_index[i];1098bit_pos[codesize[i]]++;1099}11001101/* Set sent_table FALSE so updated table will be written to JPEG file. */1102htbl->sent_table = FALSE;1103}110411051106/*1107* Finish up a statistics-gathering pass and create the new Huffman tables.1108*/11091110METHODDEF(void)1111finish_pass_gather(j_compress_ptr cinfo)1112{1113huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;1114int ci, dctbl, actbl;1115jpeg_component_info *compptr;1116JHUFF_TBL **htblptr;1117boolean did_dc[NUM_HUFF_TBLS];1118boolean did_ac[NUM_HUFF_TBLS];11191120/* It's important not to apply jpeg_gen_optimal_table more than once1121* per table, because it clobbers the input frequency counts!1122*/1123memset(did_dc, 0, sizeof(did_dc));1124memset(did_ac, 0, sizeof(did_ac));11251126for (ci = 0; ci < cinfo->comps_in_scan; ci++) {1127compptr = cinfo->cur_comp_info[ci];1128dctbl = compptr->dc_tbl_no;1129actbl = compptr->ac_tbl_no;1130if (!did_dc[dctbl]) {1131htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl];1132if (*htblptr == NULL)1133*htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);1134jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);1135did_dc[dctbl] = TRUE;1136}1137if (!did_ac[actbl]) {1138htblptr = &cinfo->ac_huff_tbl_ptrs[actbl];1139if (*htblptr == NULL)1140*htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);1141jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);1142did_ac[actbl] = TRUE;1143}1144}1145}114611471148#endif /* ENTROPY_OPT_SUPPORTED */114911501151/*1152* Module initialization routine for Huffman entropy encoding.1153*/11541155GLOBAL(void)1156jinit_huff_encoder(j_compress_ptr cinfo)1157{1158huff_entropy_ptr entropy;1159int i;11601161entropy = (huff_entropy_ptr)1162(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,1163sizeof(huff_entropy_encoder));1164cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;1165entropy->pub.start_pass = start_pass_huff;11661167/* Mark tables unallocated */1168for (i = 0; i < NUM_HUFF_TBLS; i++) {1169entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;1170#ifdef ENTROPY_OPT_SUPPORTED1171entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;1172#endif1173}1174}117511761177