Path: blob/master/thirdparty/libjpeg-turbo/src/jchuff.c
21535 views
/*1* jchuff.c2*3* This file was part of the Independent JPEG Group's software:4* Copyright (C) 1991-1997, Thomas G. Lane.5* Lossless JPEG Modifications:6* Copyright (C) 1999, Ken Murchison.7* libjpeg-turbo Modifications:8* Copyright (C) 2009-2011, 2014-2016, 2018-2025, D. R. Commander.9* Copyright (C) 2015, Matthieu Darbois.10* Copyright (C) 2018, Matthias Räncker.11* Copyright (C) 2020, Arm Limited.12* Copyright (C) 2022, Felix Hanau.13* For conditions of distribution and use, see the accompanying README.ijg14* file.15*16* This file contains Huffman entropy encoding routines.17*18* Much of the complexity here has to do with supporting output suspension.19* If the data destination module demands suspension, we want to be able to20* back up to the start of the current MCU. To do this, we copy state21* variables into local working storage, and update them back to the22* permanent JPEG objects only upon successful completion of an MCU.23*24* NOTE: All referenced figures are from25* Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.26*/2728#define JPEG_INTERNALS29#include "jinclude.h"30#include "jpeglib.h"31#ifdef WITH_SIMD32#include "jsimd.h"33#else34#include "jchuff.h" /* Declarations shared with jc*huff.c */35#endif36#include <limits.h>37#include "jpeg_nbits.h"383940/* Expanded entropy encoder object for Huffman encoding.41*42* The savable_state subrecord contains fields that change within an MCU,43* but must not be updated permanently until we complete the MCU.44*/4546#if defined(__x86_64__) && defined(__ILP32__)47typedef unsigned long long bit_buf_type;48#else49typedef size_t bit_buf_type;50#endif5152/* NOTE: The more optimal Huffman encoding algorithm is only used by the53* intrinsics implementation of the Arm Neon SIMD extensions, which is why we54* retain the old Huffman encoder behavior when using the GAS implementation.55*/56#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \57defined(_M_ARM) || defined(_M_ARM64) || \58defined(_M_ARM64EC))59typedef unsigned long long simd_bit_buf_type;60#else61typedef bit_buf_type simd_bit_buf_type;62#endif6364#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \65(defined(__x86_64__) && defined(__ILP32__))66#define BIT_BUF_SIZE 6467#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)68#define BIT_BUF_SIZE 3269#else70#error Cannot determine word size71#endif72#define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8)7374typedef struct {75union {76bit_buf_type c;77#ifdef WITH_SIMD78simd_bit_buf_type simd;79#endif80} put_buffer; /* current bit accumulation buffer */81int free_bits; /* # of bits available in it */82/* (Neon GAS: # of bits now in it) */83int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */84} savable_state;8586typedef struct {87struct jpeg_entropy_encoder pub; /* public fields */8889savable_state saved; /* Bit buffer & DC state at start of MCU */9091/* These fields are NOT loaded into local working state. */92unsigned int restarts_to_go; /* MCUs left in this restart interval */93int next_restart_num; /* next restart number to write (0-7) */9495/* Pointers to derived tables (these workspaces have image lifespan) */96c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];97c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];9899#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */100long *dc_count_ptrs[NUM_HUFF_TBLS];101long *ac_count_ptrs[NUM_HUFF_TBLS];102#endif103104#ifdef WITH_SIMD105int simd;106#endif107} huff_entropy_encoder;108109typedef huff_entropy_encoder *huff_entropy_ptr;110111/* Working state while writing an MCU.112* This struct contains all the fields that are needed by subroutines.113*/114115typedef struct {116JOCTET *next_output_byte; /* => next byte to write in buffer */117size_t free_in_buffer; /* # of byte spaces remaining in buffer */118savable_state cur; /* Current bit buffer & DC state */119j_compress_ptr cinfo; /* dump_buffer needs access to this */120#ifdef WITH_SIMD121int simd;122#endif123} working_state;124125126/* Forward declarations */127METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data);128METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);129#ifdef ENTROPY_OPT_SUPPORTED130METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo,131JBLOCKROW *MCU_data);132METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo);133#endif134135136/*137* Initialize for a Huffman-compressed scan.138* If gather_statistics is TRUE, we do not output anything during the scan,139* just count the Huffman symbols used and generate Huffman code tables.140*/141142METHODDEF(void)143start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)144{145huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;146int ci, dctbl, actbl;147jpeg_component_info *compptr;148149if (gather_statistics) {150#ifdef ENTROPY_OPT_SUPPORTED151entropy->pub.encode_mcu = encode_mcu_gather;152entropy->pub.finish_pass = finish_pass_gather;153#else154ERREXIT(cinfo, JERR_NOT_COMPILED);155#endif156} else {157entropy->pub.encode_mcu = encode_mcu_huff;158entropy->pub.finish_pass = finish_pass_huff;159}160161#ifdef WITH_SIMD162entropy->simd = jsimd_can_huff_encode_one_block();163#endif164165for (ci = 0; ci < cinfo->comps_in_scan; ci++) {166compptr = cinfo->cur_comp_info[ci];167dctbl = compptr->dc_tbl_no;168actbl = compptr->ac_tbl_no;169if (gather_statistics) {170#ifdef ENTROPY_OPT_SUPPORTED171/* Check for invalid table indexes */172/* (make_c_derived_tbl does this in the other path) */173if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)174ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);175if (actbl < 0 || actbl >= NUM_HUFF_TBLS)176ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);177/* Allocate and zero the statistics tables */178/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */179if (entropy->dc_count_ptrs[dctbl] == NULL)180entropy->dc_count_ptrs[dctbl] = (long *)181(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,182257 * sizeof(long));183memset(entropy->dc_count_ptrs[dctbl], 0, 257 * sizeof(long));184if (entropy->ac_count_ptrs[actbl] == NULL)185entropy->ac_count_ptrs[actbl] = (long *)186(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,187257 * sizeof(long));188memset(entropy->ac_count_ptrs[actbl], 0, 257 * sizeof(long));189#endif190} else {191/* Compute derived values for Huffman tables */192/* We may do this more than once for a table, but it's not expensive */193jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,194&entropy->dc_derived_tbls[dctbl]);195jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,196&entropy->ac_derived_tbls[actbl]);197}198/* Initialize DC predictions to 0 */199entropy->saved.last_dc_val[ci] = 0;200}201202/* Initialize bit buffer to empty */203#ifdef WITH_SIMD204if (entropy->simd) {205entropy->saved.put_buffer.simd = 0;206#if defined(__aarch64__) && !defined(NEON_INTRINSICS)207entropy->saved.free_bits = 0;208#else209entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;210#endif211} else212#endif213{214entropy->saved.put_buffer.c = 0;215entropy->saved.free_bits = BIT_BUF_SIZE;216}217218/* Initialize restart stuff */219entropy->restarts_to_go = cinfo->restart_interval;220entropy->next_restart_num = 0;221}222223224/*225* Compute the derived values for a Huffman table.226* This routine also performs some validation checks on the table.227*228* Note this is also used by jcphuff.c and jclhuff.c.229*/230231GLOBAL(void)232jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,233c_derived_tbl **pdtbl)234{235JHUFF_TBL *htbl;236c_derived_tbl *dtbl;237int p, i, l, lastp, si, maxsymbol;238char huffsize[257];239unsigned int huffcode[257];240unsigned int code;241242/* Note that huffsize[] and huffcode[] are filled in code-length order,243* paralleling the order of the symbols themselves in htbl->huffval[].244*/245246/* Find the input Huffman table */247if (tblno < 0 || tblno >= NUM_HUFF_TBLS)248ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);249htbl =250isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];251if (htbl == NULL)252ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);253254/* Allocate a workspace if we haven't already done so. */255if (*pdtbl == NULL)256*pdtbl = (c_derived_tbl *)257(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,258sizeof(c_derived_tbl));259dtbl = *pdtbl;260261/* Figure C.1: make table of Huffman code length for each symbol */262263p = 0;264for (l = 1; l <= 16; l++) {265i = (int)htbl->bits[l];266if (i < 0 || p + i > 256) /* protect against table overrun */267ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);268while (i--)269huffsize[p++] = (char)l;270}271huffsize[p] = 0;272lastp = p;273274/* Figure C.2: generate the codes themselves */275/* We also validate that the counts represent a legal Huffman code tree. */276277code = 0;278si = huffsize[0];279p = 0;280while (huffsize[p]) {281while (((int)huffsize[p]) == si) {282huffcode[p++] = code;283code++;284}285/* code is now 1 more than the last code used for codelength si; but286* it must still fit in si bits, since no code is allowed to be all ones.287*/288if (((JLONG)code) >= (((JLONG)1) << si))289ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);290code <<= 1;291si++;292}293294/* Figure C.3: generate encoding tables */295/* These are code and size indexed by symbol value */296297/* Set all codeless symbols to have code length 0;298* this lets us detect duplicate VAL entries here, and later299* allows emit_bits to detect any attempt to emit such symbols.300*/301memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));302memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));303304/* This is also a convenient place to check for out-of-range and duplicated305* VAL entries. We allow 0..255 for AC symbols but only 0..15 for DC in306* lossy mode and 0..16 for DC in lossless mode. (We could constrain them307* further based on data depth and mode, but this seems enough.)308*/309maxsymbol = isDC ? (cinfo->master->lossless ? 16 : 15) : 255;310311for (p = 0; p < lastp; p++) {312i = htbl->huffval[p];313if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])314ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);315dtbl->ehufco[i] = huffcode[p];316dtbl->ehufsi[i] = huffsize[p];317}318}319320321/* Outputting bytes to the file */322323/* Emit a byte, taking 'action' if must suspend. */324#define emit_byte(state, val, action) { \325*(state)->next_output_byte++ = (JOCTET)(val); \326if (--(state)->free_in_buffer == 0) \327if (!dump_buffer(state)) \328{ action; } \329}330331332LOCAL(boolean)333dump_buffer(working_state *state)334/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */335{336struct jpeg_destination_mgr *dest = state->cinfo->dest;337338if (!(*dest->empty_output_buffer) (state->cinfo))339return FALSE;340/* After a successful buffer dump, must reset buffer pointers */341state->next_output_byte = dest->next_output_byte;342state->free_in_buffer = dest->free_in_buffer;343return TRUE;344}345346347/* Outputting bits to the file */348349/* Output byte b and, speculatively, an additional 0 byte. 0xFF must be350* encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the351* byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and352* the speculative 0 byte will be overwritten by the next byte.353*/354#define EMIT_BYTE(b) { \355buffer[0] = (JOCTET)(b); \356buffer[1] = 0; \357buffer -= -2 + ((JOCTET)(b) < 0xFF); \358}359360/* Output the entire bit buffer. If there are no 0xFF bytes in it, then write361* directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to362* encode 0xFF as 0xFF 0x00.363*/364#if BIT_BUF_SIZE == 64365366#define FLUSH() { \367if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \368EMIT_BYTE(put_buffer >> 56) \369EMIT_BYTE(put_buffer >> 48) \370EMIT_BYTE(put_buffer >> 40) \371EMIT_BYTE(put_buffer >> 32) \372EMIT_BYTE(put_buffer >> 24) \373EMIT_BYTE(put_buffer >> 16) \374EMIT_BYTE(put_buffer >> 8) \375EMIT_BYTE(put_buffer ) \376} else { \377buffer[0] = (JOCTET)(put_buffer >> 56); \378buffer[1] = (JOCTET)(put_buffer >> 48); \379buffer[2] = (JOCTET)(put_buffer >> 40); \380buffer[3] = (JOCTET)(put_buffer >> 32); \381buffer[4] = (JOCTET)(put_buffer >> 24); \382buffer[5] = (JOCTET)(put_buffer >> 16); \383buffer[6] = (JOCTET)(put_buffer >> 8); \384buffer[7] = (JOCTET)(put_buffer); \385buffer += 8; \386} \387}388389#else390391#define FLUSH() { \392if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \393EMIT_BYTE(put_buffer >> 24) \394EMIT_BYTE(put_buffer >> 16) \395EMIT_BYTE(put_buffer >> 8) \396EMIT_BYTE(put_buffer ) \397} else { \398buffer[0] = (JOCTET)(put_buffer >> 24); \399buffer[1] = (JOCTET)(put_buffer >> 16); \400buffer[2] = (JOCTET)(put_buffer >> 8); \401buffer[3] = (JOCTET)(put_buffer); \402buffer += 4; \403} \404}405406#endif407408/* Fill the bit buffer to capacity with the leading bits from code, then output409* the bit buffer and put the remaining bits from code into the bit buffer.410*/411#define PUT_AND_FLUSH(code, size) { \412put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \413FLUSH() \414free_bits += BIT_BUF_SIZE; \415put_buffer = code; \416}417418/* Insert code into the bit buffer and output the bit buffer if needed.419* NOTE: We can't flush with free_bits == 0, since the left shift in420* PUT_AND_FLUSH() would have undefined behavior.421*/422#define PUT_BITS(code, size) { \423free_bits -= size; \424if (free_bits < 0) \425PUT_AND_FLUSH(code, size) \426else \427put_buffer = (put_buffer << size) | code; \428}429430#define PUT_CODE(code, size) { \431temp &= (((JLONG)1) << nbits) - 1; \432temp |= code << nbits; \433nbits += size; \434PUT_BITS(temp, nbits) \435}436437438/* Although it is exceedingly rare, it is possible for a Huffman-encoded439* coefficient block to be larger than the 128-byte unencoded block. For each440* of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can441* theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per442* encoded block.) If, for instance, one artificially sets the AC443* coefficients to alternating values of 32767 and -32768 (using the JPEG444* scanning order-- 1, 8, 16, etc.), then this will produce an encoded block445* larger than 200 bytes.446*/447#define BUFSIZE (DCTSIZE2 * 8)448449#define LOAD_BUFFER() { \450if (state->free_in_buffer < BUFSIZE) { \451localbuf = 1; \452buffer = _buffer; \453} else \454buffer = state->next_output_byte; \455}456457#define STORE_BUFFER() { \458if (localbuf) { \459size_t bytes, bytestocopy; \460bytes = buffer - _buffer; \461buffer = _buffer; \462while (bytes > 0) { \463bytestocopy = MIN(bytes, state->free_in_buffer); \464memcpy(state->next_output_byte, buffer, bytestocopy); \465state->next_output_byte += bytestocopy; \466buffer += bytestocopy; \467state->free_in_buffer -= bytestocopy; \468if (state->free_in_buffer == 0) \469if (!dump_buffer(state)) return FALSE; \470bytes -= bytestocopy; \471} \472} else { \473state->free_in_buffer -= (buffer - state->next_output_byte); \474state->next_output_byte = buffer; \475} \476}477478479LOCAL(boolean)480flush_bits(working_state *state)481{482JOCTET _buffer[BUFSIZE], *buffer, temp;483simd_bit_buf_type put_buffer; int put_bits;484int localbuf = 0;485486#ifdef WITH_SIMD487if (state->simd) {488#if defined(__aarch64__) && !defined(NEON_INTRINSICS)489put_bits = state->cur.free_bits;490#else491put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;492#endif493put_buffer = state->cur.put_buffer.simd;494} else495#endif496{497put_bits = BIT_BUF_SIZE - state->cur.free_bits;498put_buffer = state->cur.put_buffer.c;499}500501LOAD_BUFFER()502503while (put_bits >= 8) {504put_bits -= 8;505temp = (JOCTET)(put_buffer >> put_bits);506EMIT_BYTE(temp)507}508if (put_bits) {509/* fill partial byte with ones */510temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));511EMIT_BYTE(temp)512}513514#ifdef WITH_SIMD515if (state->simd) { /* and reset bit buffer to empty */516state->cur.put_buffer.simd = 0;517#if defined(__aarch64__) && !defined(NEON_INTRINSICS)518state->cur.free_bits = 0;519#else520state->cur.free_bits = SIMD_BIT_BUF_SIZE;521#endif522} else523#endif524{525state->cur.put_buffer.c = 0;526state->cur.free_bits = BIT_BUF_SIZE;527}528STORE_BUFFER()529530return TRUE;531}532533534#ifdef WITH_SIMD535536/* Encode a single block's worth of coefficients */537538LOCAL(boolean)539encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,540c_derived_tbl *dctbl, c_derived_tbl *actbl)541{542JOCTET _buffer[BUFSIZE], *buffer;543int localbuf = 0;544545#ifdef ZERO_BUFFERS546memset(_buffer, 0, sizeof(_buffer));547#endif548549LOAD_BUFFER()550551buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val,552dctbl, actbl);553554STORE_BUFFER()555556return TRUE;557}558559#endif560561LOCAL(boolean)562encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,563c_derived_tbl *dctbl, c_derived_tbl *actbl)564{565int temp, nbits, free_bits;566bit_buf_type put_buffer;567JOCTET _buffer[BUFSIZE], *buffer;568int localbuf = 0;569int max_coef_bits = state->cinfo->data_precision + 2;570571free_bits = state->cur.free_bits;572put_buffer = state->cur.put_buffer.c;573LOAD_BUFFER()574575/* Encode the DC coefficient difference per section F.1.2.1 */576577temp = block[0] - last_dc_val;578579/* This is a well-known technique for obtaining the absolute value without a580* branch. It is derived from an assembly language technique presented in581* "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by582* Agner Fog. This code assumes we are on a two's complement machine.583*/584nbits = temp >> (CHAR_BIT * sizeof(int) - 1);585temp += nbits;586nbits ^= temp;587588/* Find the number of bits needed for the magnitude of the coefficient */589nbits = JPEG_NBITS(nbits);590/* Check for out-of-range coefficient values.591* Since we're encoding a difference, the range limit is twice as much.592*/593if (nbits > max_coef_bits + 1)594ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);595596/* Emit the Huffman-coded symbol for the number of bits.597* Emit that number of bits of the value, if positive,598* or the complement of its magnitude, if negative.599*/600PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])601602/* Encode the AC coefficients per section F.1.2.2 */603604{605int r = 0; /* r = run length of zeros */606607/* Manually unroll the k loop to eliminate the counter variable. This608* improves performance greatly on systems with a limited number of609* registers (such as x86.)610*/611#define kloop(jpeg_natural_order_of_k) { \612if ((temp = block[jpeg_natural_order_of_k]) == 0) { \613r += 16; \614} else { \615/* Branch-less absolute value, bitwise complement, etc., same as above */ \616nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \617temp += nbits; \618nbits ^= temp; \619nbits = JPEG_NBITS_NONZERO(nbits); \620/* Check for out-of-range coefficient values */ \621if (nbits > max_coef_bits) \622ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); \623/* if run length > 15, must emit special run-length-16 codes (0xF0) */ \624while (r >= 16 * 16) { \625r -= 16 * 16; \626PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \627} \628/* Emit Huffman symbol for run length / number of bits */ \629r += nbits; \630PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \631r = 0; \632} \633}634635/* One iteration for each value in jpeg_natural_order[] */636kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);637kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);638kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);639kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);640kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);641kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);642kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);643kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);644kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);645kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);646kloop(55); kloop(62); kloop(63);647648/* If the last coef(s) were zero, emit an end-of-block code */649if (r > 0) {650PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])651}652}653654state->cur.put_buffer.c = put_buffer;655state->cur.free_bits = free_bits;656STORE_BUFFER()657658return TRUE;659}660661662/*663* Emit a restart marker & resynchronize predictions.664*/665666LOCAL(boolean)667emit_restart(working_state *state, int restart_num)668{669int ci;670671if (!flush_bits(state))672return FALSE;673674emit_byte(state, 0xFF, return FALSE);675emit_byte(state, JPEG_RST0 + restart_num, return FALSE);676677/* Re-initialize DC predictions to 0 */678for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)679state->cur.last_dc_val[ci] = 0;680681/* The restart counter is not updated until we successfully write the MCU. */682683return TRUE;684}685686687/*688* Encode and output one MCU's worth of Huffman-compressed coefficients.689*/690691METHODDEF(boolean)692encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)693{694huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;695working_state state;696int blkn, ci;697jpeg_component_info *compptr;698699/* Load up working state */700state.next_output_byte = cinfo->dest->next_output_byte;701state.free_in_buffer = cinfo->dest->free_in_buffer;702state.cur = entropy->saved;703state.cinfo = cinfo;704#ifdef WITH_SIMD705state.simd = entropy->simd;706#endif707708/* Emit restart marker if needed */709if (cinfo->restart_interval) {710if (entropy->restarts_to_go == 0)711if (!emit_restart(&state, entropy->next_restart_num))712return FALSE;713}714715/* Encode the MCU data blocks */716#ifdef WITH_SIMD717if (entropy->simd) {718for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {719ci = cinfo->MCU_membership[blkn];720compptr = cinfo->cur_comp_info[ci];721if (!encode_one_block_simd(&state,722MCU_data[blkn][0], state.cur.last_dc_val[ci],723entropy->dc_derived_tbls[compptr->dc_tbl_no],724entropy->ac_derived_tbls[compptr->ac_tbl_no]))725return FALSE;726/* Update last_dc_val */727state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];728}729} else730#endif731{732for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {733ci = cinfo->MCU_membership[blkn];734compptr = cinfo->cur_comp_info[ci];735if (!encode_one_block(&state,736MCU_data[blkn][0], state.cur.last_dc_val[ci],737entropy->dc_derived_tbls[compptr->dc_tbl_no],738entropy->ac_derived_tbls[compptr->ac_tbl_no]))739return FALSE;740/* Update last_dc_val */741state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];742}743}744745/* Completed MCU, so update state */746cinfo->dest->next_output_byte = state.next_output_byte;747cinfo->dest->free_in_buffer = state.free_in_buffer;748entropy->saved = state.cur;749750/* Update restart-interval state too */751if (cinfo->restart_interval) {752if (entropy->restarts_to_go == 0) {753entropy->restarts_to_go = cinfo->restart_interval;754entropy->next_restart_num++;755entropy->next_restart_num &= 7;756}757entropy->restarts_to_go--;758}759760return TRUE;761}762763764/*765* Finish up at the end of a Huffman-compressed scan.766*/767768METHODDEF(void)769finish_pass_huff(j_compress_ptr cinfo)770{771huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;772working_state state;773774/* Load up working state ... flush_bits needs it */775state.next_output_byte = cinfo->dest->next_output_byte;776state.free_in_buffer = cinfo->dest->free_in_buffer;777state.cur = entropy->saved;778state.cinfo = cinfo;779#ifdef WITH_SIMD780state.simd = entropy->simd;781#endif782783/* Flush out the last data */784if (!flush_bits(&state))785ERREXIT(cinfo, JERR_CANT_SUSPEND);786787/* Update state */788cinfo->dest->next_output_byte = state.next_output_byte;789cinfo->dest->free_in_buffer = state.free_in_buffer;790entropy->saved = state.cur;791}792793794/*795* Huffman coding optimization.796*797* We first scan the supplied data and count the number of uses of each symbol798* that is to be Huffman-coded. (This process MUST agree with the code above.)799* Then we build a Huffman coding tree for the observed counts.800* Symbols which are not needed at all for the particular image are not801* assigned any code, which saves space in the DHT marker as well as in802* the compressed data.803*/804805#ifdef ENTROPY_OPT_SUPPORTED806807808/* Process a single block's worth of coefficients */809810LOCAL(void)811htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,812long dc_counts[], long ac_counts[])813{814register int temp;815register int nbits;816register int k, r;817int max_coef_bits = cinfo->data_precision + 2;818819/* Encode the DC coefficient difference per section F.1.2.1 */820821temp = block[0] - last_dc_val;822if (temp < 0)823temp = -temp;824825/* Find the number of bits needed for the magnitude of the coefficient */826nbits = 0;827while (temp) {828nbits++;829temp >>= 1;830}831/* Check for out-of-range coefficient values.832* Since we're encoding a difference, the range limit is twice as much.833*/834if (nbits > max_coef_bits + 1)835ERREXIT(cinfo, JERR_BAD_DCT_COEF);836837/* Count the Huffman symbol for the number of bits */838dc_counts[nbits]++;839840/* Encode the AC coefficients per section F.1.2.2 */841842r = 0; /* r = run length of zeros */843844for (k = 1; k < DCTSIZE2; k++) {845if ((temp = block[jpeg_natural_order[k]]) == 0) {846r++;847} else {848/* if run length > 15, must emit special run-length-16 codes (0xF0) */849while (r > 15) {850ac_counts[0xF0]++;851r -= 16;852}853854/* Find the number of bits needed for the magnitude of the coefficient */855if (temp < 0)856temp = -temp;857858/* Find the number of bits needed for the magnitude of the coefficient */859nbits = 1; /* there must be at least one 1 bit */860while ((temp >>= 1))861nbits++;862/* Check for out-of-range coefficient values */863if (nbits > max_coef_bits)864ERREXIT(cinfo, JERR_BAD_DCT_COEF);865866/* Count Huffman symbol for run length / number of bits */867ac_counts[(r << 4) + nbits]++;868869r = 0;870}871}872873/* If the last coef(s) were zero, emit an end-of-block code */874if (r > 0)875ac_counts[0]++;876}877878879/*880* Trial-encode one MCU's worth of Huffman-compressed coefficients.881* No data is actually output, so no suspension return is possible.882*/883884METHODDEF(boolean)885encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data)886{887huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;888int blkn, ci;889jpeg_component_info *compptr;890891/* Take care of restart intervals if needed */892if (cinfo->restart_interval) {893if (entropy->restarts_to_go == 0) {894/* Re-initialize DC predictions to 0 */895for (ci = 0; ci < cinfo->comps_in_scan; ci++)896entropy->saved.last_dc_val[ci] = 0;897/* Update restart state */898entropy->restarts_to_go = cinfo->restart_interval;899}900entropy->restarts_to_go--;901}902903for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {904ci = cinfo->MCU_membership[blkn];905compptr = cinfo->cur_comp_info[ci];906htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],907entropy->dc_count_ptrs[compptr->dc_tbl_no],908entropy->ac_count_ptrs[compptr->ac_tbl_no]);909entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];910}911912return TRUE;913}914915916/*917* Generate the best Huffman code table for the given counts, fill htbl.918* Note this is also used by jcphuff.c and jclhuff.c.919*920* The JPEG standard requires that no symbol be assigned a codeword of all921* one bits (so that padding bits added at the end of a compressed segment922* can't look like a valid code). Because of the canonical ordering of923* codewords, this just means that there must be an unused slot in the924* longest codeword length category. Annex K (Clause K.2) of925* Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot926* by pretending that symbol 256 is a valid symbol with count 1. In theory927* that's not optimal; giving it count zero but including it in the symbol set928* anyway should give a better Huffman code. But the theoretically better code929* actually seems to come out worse in practice, because it produces more930* all-ones bytes (which incur stuffed zero bytes in the final file). In any931* case the difference is tiny.932*933* The JPEG standard requires Huffman codes to be no more than 16 bits long.934* If some symbols have a very small but nonzero probability, the Huffman tree935* must be adjusted to meet the code length restriction. We currently use936* the adjustment method suggested in JPEG section K.2. This method is *not*937* optimal; it may not choose the best possible limited-length code. But938* typically only very-low-frequency symbols will be given less-than-optimal939* lengths, so the code is almost optimal. Experimental comparisons against940* an optimal limited-length-code algorithm indicate that the difference is941* microscopic --- usually less than a hundredth of a percent of total size.942* So the extra complexity of an optimal algorithm doesn't seem worthwhile.943*/944945GLOBAL(void)946jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])947{948#define MAX_CLEN 32 /* assumed maximum initial code length */949UINT8 bits[MAX_CLEN + 1]; /* bits[k] = # of symbols with code length k */950int bit_pos[MAX_CLEN + 1]; /* # of symbols with smaller code length */951int codesize[257]; /* codesize[k] = code length of symbol k */952int nz_index[257]; /* index of nonzero symbol in the original freq953array */954int others[257]; /* next symbol in current branch of tree */955int c1, c2;956int p, i, j;957int num_nz_symbols;958long v, v2;959960/* This algorithm is explained in section K.2 of the JPEG standard */961962memset(bits, 0, sizeof(bits));963memset(codesize, 0, sizeof(codesize));964for (i = 0; i < 257; i++)965others[i] = -1; /* init links to empty */966967freq[256] = 1; /* make sure 256 has a nonzero count */968/* Including the pseudo-symbol 256 in the Huffman procedure guarantees969* that no real symbol is given code-value of all ones, because 256970* will be placed last in the largest codeword category.971*/972973/* Group nonzero frequencies together so we can more easily find the974* smallest.975*/976num_nz_symbols = 0;977for (i = 0; i < 257; i++) {978if (freq[i]) {979nz_index[num_nz_symbols] = i;980freq[num_nz_symbols] = freq[i];981num_nz_symbols++;982}983}984985/* Huffman's basic algorithm to assign optimal code lengths to symbols */986987for (;;) {988/* Find the two smallest nonzero frequencies; set c1, c2 = their symbols */989/* In case of ties, take the larger symbol number. Since we have grouped990* the nonzero symbols together, checking for zero symbols is not991* necessary.992*/993c1 = -1;994c2 = -1;995v = 1000000000L;996v2 = 1000000000L;997for (i = 0; i < num_nz_symbols; i++) {998if (freq[i] <= v2) {999if (freq[i] <= v) {1000c2 = c1;1001v2 = v;1002v = freq[i];1003c1 = i;1004} else {1005v2 = freq[i];1006c2 = i;1007}1008}1009}10101011/* Done if we've merged everything into one frequency */1012if (c2 < 0)1013break;10141015/* Else merge the two counts/trees */1016freq[c1] += freq[c2];1017/* Set the frequency to a very high value instead of zero, so we don't have1018* to check for zero values.1019*/1020freq[c2] = 1000000001L;10211022/* Increment the codesize of everything in c1's tree branch */1023codesize[c1]++;1024while (others[c1] >= 0) {1025c1 = others[c1];1026codesize[c1]++;1027}10281029others[c1] = c2; /* chain c2 onto c1's tree branch */10301031/* Increment the codesize of everything in c2's tree branch */1032codesize[c2]++;1033while (others[c2] >= 0) {1034c2 = others[c2];1035codesize[c2]++;1036}1037}10381039/* Now count the number of symbols of each code length */1040for (i = 0; i < num_nz_symbols; i++) {1041/* The JPEG standard seems to think that this can't happen, */1042/* but I'm paranoid... */1043if (codesize[i] > MAX_CLEN)1044ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);10451046bits[codesize[i]]++;1047}10481049/* Count the number of symbols with a length smaller than i bits, so we can1050* construct the symbol table more efficiently. Note that this includes the1051* pseudo-symbol 256, but since it is the last symbol, it will not affect the1052* table.1053*/1054p = 0;1055for (i = 1; i <= MAX_CLEN; i++) {1056bit_pos[i] = p;1057p += bits[i];1058}10591060/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure1061* Huffman procedure assigned any such lengths, we must adjust the coding.1062* Here is what Rec. ITU-T T.81 | ISO/IEC 10918-1 says about how this next1063* bit works: Since symbols are paired for the longest Huffman code, the1064* symbols are removed from this length category two at a time. The prefix1065* for the pair (which is one bit shorter) is allocated to one of the pair;1066* then, skipping the BITS entry for that prefix length, a code word from the1067* next shortest nonzero BITS entry is converted into a prefix for two code1068* words one bit longer.1069*/10701071for (i = MAX_CLEN; i > 16; i--) {1072while (bits[i] > 0) {1073j = i - 2; /* find length of new prefix to be used */1074while (bits[j] == 0)1075j--;10761077bits[i] -= 2; /* remove two symbols */1078bits[i - 1]++; /* one goes in this length */1079bits[j + 1] += 2; /* two new symbols in this length */1080bits[j]--; /* symbol of this length is now a prefix */1081}1082}10831084/* Remove the count for the pseudo-symbol 256 from the largest codelength */1085while (bits[i] == 0) /* find largest codelength still in use */1086i--;1087bits[i]--;10881089/* Return final symbol counts (only for lengths 0..16) */1090memcpy(htbl->bits, bits, sizeof(htbl->bits));10911092/* Return a list of the symbols sorted by code length */1093/* It's not real clear to me why we don't need to consider the codelength1094* changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think1095* this works.1096*/1097for (i = 0; i < num_nz_symbols - 1; i++) {1098htbl->huffval[bit_pos[codesize[i]]] = (UINT8)nz_index[i];1099bit_pos[codesize[i]]++;1100}11011102/* Set sent_table FALSE so updated table will be written to JPEG file. */1103htbl->sent_table = FALSE;1104}110511061107/*1108* Finish up a statistics-gathering pass and create the new Huffman tables.1109*/11101111METHODDEF(void)1112finish_pass_gather(j_compress_ptr cinfo)1113{1114huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;1115int ci, dctbl, actbl;1116jpeg_component_info *compptr;1117JHUFF_TBL **htblptr;1118boolean did_dc[NUM_HUFF_TBLS];1119boolean did_ac[NUM_HUFF_TBLS];11201121/* It's important not to apply jpeg_gen_optimal_table more than once1122* per table, because it clobbers the input frequency counts!1123*/1124memset(did_dc, 0, sizeof(did_dc));1125memset(did_ac, 0, sizeof(did_ac));11261127for (ci = 0; ci < cinfo->comps_in_scan; ci++) {1128compptr = cinfo->cur_comp_info[ci];1129dctbl = compptr->dc_tbl_no;1130actbl = compptr->ac_tbl_no;1131if (!did_dc[dctbl]) {1132htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl];1133if (*htblptr == NULL)1134*htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);1135jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);1136did_dc[dctbl] = TRUE;1137}1138if (!did_ac[actbl]) {1139htblptr = &cinfo->ac_huff_tbl_ptrs[actbl];1140if (*htblptr == NULL)1141*htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);1142jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);1143did_ac[actbl] = TRUE;1144}1145}1146}114711481149#endif /* ENTROPY_OPT_SUPPORTED */115011511152/*1153* Module initialization routine for Huffman entropy encoding.1154*/11551156GLOBAL(void)1157jinit_huff_encoder(j_compress_ptr cinfo)1158{1159huff_entropy_ptr entropy;1160int i;11611162entropy = (huff_entropy_ptr)1163(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,1164sizeof(huff_entropy_encoder));1165cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;1166entropy->pub.start_pass = start_pass_huff;11671168/* Mark tables unallocated */1169for (i = 0; i < NUM_HUFF_TBLS; i++) {1170entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;1171#ifdef ENTROPY_OPT_SUPPORTED1172entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;1173#endif1174}1175}117611771178