Path: blob/master/thirdparty/libwebp/src/enc/histogram_enc.c
21733 views
// Copyright 2012 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// Author: Jyrki Alakuijala ([email protected])10//11#ifdef HAVE_CONFIG_H12#include "src/webp/config.h"13#endif1415#include <assert.h>16#include <stdlib.h>17#include <string.h>1819#include "src/dsp/lossless.h"20#include "src/dsp/lossless_common.h"21#include "src/enc/backward_references_enc.h"22#include "src/enc/histogram_enc.h"23#include "src/enc/vp8i_enc.h"24#include "src/utils/utils.h"25#include "src/webp/encode.h"26#include "src/webp/format_constants.h"27#include "src/webp/types.h"2829// Number of partitions for the three dominant (literal, red and blue) symbol30// costs.31#define NUM_PARTITIONS 432// The size of the bin-hash corresponding to the three dominant costs.33#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)34// Maximum number of histograms allowed in greedy combining algorithm.35#define MAX_HISTO_GREEDY 1003637// Enum to meaningfully access the elements of the Histogram arrays.38typedef enum {39LITERAL = 0,40RED,41BLUE,42ALPHA,43DISTANCE44} HistogramIndex;4546// Return the size of the histogram for a given cache_bits.47static int GetHistogramSize(int cache_bits) {48const int literal_size = VP8LHistogramNumCodes(cache_bits);49const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;50assert(total_size <= (size_t)0x7fffffff);51return (int)total_size;52}5354static void HistogramStatsClear(VP8LHistogram* const h) {55int i;56for (i = 0; i < 5; ++i) {57h->trivial_symbol[i] = VP8L_NON_TRIVIAL_SYM;58// By default, the histogram is assumed to be used.59h->is_used[i] = 1;60}61h->bit_cost = 0;62memset(h->costs, 0, sizeof(h->costs));63}6465static void HistogramClear(VP8LHistogram* const h) {66uint32_t* const literal = h->literal;67const int cache_bits = h->palette_code_bits;68const int histo_size = GetHistogramSize(cache_bits);69memset(h, 0, histo_size);70h->palette_code_bits = cache_bits;71h->literal = literal;72HistogramStatsClear(h);73}7475// Swap two histogram pointers.76static void HistogramSwap(VP8LHistogram** const h1, VP8LHistogram** const h2) {77VP8LHistogram* const tmp = *h1;78*h1 = *h2;79*h2 = tmp;80}8182static void HistogramCopy(const VP8LHistogram* const src,83VP8LHistogram* const dst) {84uint32_t* const dst_literal = dst->literal;85const int dst_cache_bits = dst->palette_code_bits;86const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);87const int histo_size = GetHistogramSize(dst_cache_bits);88assert(src->palette_code_bits == dst_cache_bits);89memcpy(dst, src, histo_size);90dst->literal = dst_literal;91memcpy(dst->literal, src->literal, literal_size * sizeof(*dst->literal));92}9394void VP8LFreeHistogram(VP8LHistogram* const h) { WebPSafeFree(h); }9596void VP8LFreeHistogramSet(VP8LHistogramSet* const histograms) {97WebPSafeFree(histograms);98}99100void VP8LHistogramCreate(VP8LHistogram* const h,101const VP8LBackwardRefs* const refs,102int palette_code_bits) {103if (palette_code_bits >= 0) {104h->palette_code_bits = palette_code_bits;105}106HistogramClear(h);107VP8LHistogramStoreRefs(refs, /*distance_modifier=*/NULL,108/*distance_modifier_arg0=*/0, h);109}110111void VP8LHistogramInit(VP8LHistogram* const h, int palette_code_bits,112int init_arrays) {113h->palette_code_bits = palette_code_bits;114if (init_arrays) {115HistogramClear(h);116} else {117HistogramStatsClear(h);118}119}120121VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {122VP8LHistogram* histo = NULL;123const int total_size = GetHistogramSize(cache_bits);124uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));125if (memory == NULL) return NULL;126histo = (VP8LHistogram*)memory;127// 'literal' won't necessary be aligned.128histo->literal = (uint32_t*)(memory + sizeof(VP8LHistogram));129VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 0);130return histo;131}132133// Resets the pointers of the histograms to point to the bit buffer in the set.134static void HistogramSetResetPointers(VP8LHistogramSet* const set,135int cache_bits) {136int i;137const int histo_size = GetHistogramSize(cache_bits);138uint8_t* memory = (uint8_t*) (set->histograms);139memory += set->max_size * sizeof(*set->histograms);140for (i = 0; i < set->max_size; ++i) {141memory = (uint8_t*) WEBP_ALIGN(memory);142set->histograms[i] = (VP8LHistogram*) memory;143// 'literal' won't necessary be aligned.144set->histograms[i]->literal = (uint32_t*)(memory + sizeof(VP8LHistogram));145memory += histo_size;146}147}148149// Returns the total size of the VP8LHistogramSet.150static size_t HistogramSetTotalSize(int size, int cache_bits) {151const int histo_size = GetHistogramSize(cache_bits);152return (sizeof(VP8LHistogramSet) + size * (sizeof(VP8LHistogram*) +153histo_size + WEBP_ALIGN_CST));154}155156VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {157int i;158VP8LHistogramSet* set;159const size_t total_size = HistogramSetTotalSize(size, cache_bits);160uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));161if (memory == NULL) return NULL;162163set = (VP8LHistogramSet*)memory;164memory += sizeof(*set);165set->histograms = (VP8LHistogram**)memory;166set->max_size = size;167set->size = size;168HistogramSetResetPointers(set, cache_bits);169for (i = 0; i < size; ++i) {170VP8LHistogramInit(set->histograms[i], cache_bits, /*init_arrays=*/ 0);171}172return set;173}174175void VP8LHistogramSetClear(VP8LHistogramSet* const set) {176int i;177const int cache_bits = set->histograms[0]->palette_code_bits;178const int size = set->max_size;179const size_t total_size = HistogramSetTotalSize(size, cache_bits);180uint8_t* memory = (uint8_t*)set;181182memset(memory, 0, total_size);183memory += sizeof(*set);184set->histograms = (VP8LHistogram**)memory;185set->max_size = size;186set->size = size;187HistogramSetResetPointers(set, cache_bits);188for (i = 0; i < size; ++i) {189set->histograms[i]->palette_code_bits = cache_bits;190}191}192193// Removes the histogram 'i' from 'set'.194static void HistogramSetRemoveHistogram(VP8LHistogramSet* const set, int i) {195set->histograms[i] = set->histograms[set->size - 1];196--set->size;197assert(set->size > 0);198}199200// -----------------------------------------------------------------------------201202static void HistogramAddSinglePixOrCopy(203VP8LHistogram* const histo, const PixOrCopy* const v,204int (*const distance_modifier)(int, int), int distance_modifier_arg0) {205if (PixOrCopyIsLiteral(v)) {206++histo->alpha[PixOrCopyLiteral(v, 3)];207++histo->red[PixOrCopyLiteral(v, 2)];208++histo->literal[PixOrCopyLiteral(v, 1)];209++histo->blue[PixOrCopyLiteral(v, 0)];210} else if (PixOrCopyIsCacheIdx(v)) {211const int literal_ix =212NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);213assert(histo->palette_code_bits != 0);214++histo->literal[literal_ix];215} else {216int code, extra_bits;217VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);218++histo->literal[NUM_LITERAL_CODES + code];219if (distance_modifier == NULL) {220VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);221} else {222VP8LPrefixEncodeBits(223distance_modifier(distance_modifier_arg0, PixOrCopyDistance(v)),224&code, &extra_bits);225}226++histo->distance[code];227}228}229230void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,231int (*const distance_modifier)(int, int),232int distance_modifier_arg0,233VP8LHistogram* const histo) {234VP8LRefsCursor c = VP8LRefsCursorInit(refs);235while (VP8LRefsCursorOk(&c)) {236HistogramAddSinglePixOrCopy(histo, c.cur_pos, distance_modifier,237distance_modifier_arg0);238VP8LRefsCursorNext(&c);239}240}241242// -----------------------------------------------------------------------------243// Entropy-related functions.244245static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) {246uint64_t mix;247if (entropy->nonzeros < 5) {248if (entropy->nonzeros <= 1) {249return 0;250}251// Two symbols, they will be 0 and 1 in a Huffman code.252// Let's mix in a bit of entropy to favor good clustering when253// distributions of these are combined.254if (entropy->nonzeros == 2) {255return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) +256entropy->entropy,257100);258}259// No matter what the entropy says, we cannot be better than min_limit260// with Huffman coding. I am mixing a bit of entropy into the261// min_limit since it produces much better (~0.5 %) compression results262// perhaps because of better entropy clustering.263if (entropy->nonzeros == 3) {264mix = 950;265} else {266mix = 700; // nonzeros == 4.267}268} else {269mix = 627;270}271272{273uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val)274<< LOG_2_PRECISION_BITS;275min_limit =276DivRound(mix * min_limit + (1000 - mix) * entropy->entropy, 1000);277return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;278}279}280281uint64_t VP8LBitsEntropy(const uint32_t* const array, int n) {282VP8LBitEntropy entropy;283VP8LBitsEntropyUnrefined(array, n, &entropy);284285return BitsEntropyRefine(&entropy);286}287288static uint64_t InitialHuffmanCost(void) {289// Small bias because Huffman code length is typically not stored in290// full length.291static const uint64_t kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;292// Subtract a bias of 9.1.293return (kHuffmanCodeOfHuffmanCodeSize << LOG_2_PRECISION_BITS) -294DivRound(91ll << LOG_2_PRECISION_BITS, 10);295}296297// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)298static uint64_t FinalHuffmanCost(const VP8LStreaks* const stats) {299// The constants in this function are empirical and got rounded from300// their original values in 1/8 when switched to 1/1024.301uint64_t retval = InitialHuffmanCost();302// Second coefficient: Many zeros in the histogram are covered efficiently303// by a run-length encode. Originally 2/8.304uint32_t retval_extra = stats->counts[0] * 1600 + 240 * stats->streaks[0][1];305// Second coefficient: Constant values are encoded less efficiently, but still306// RLE'ed. Originally 6/8.307retval_extra += stats->counts[1] * 2640 + 720 * stats->streaks[1][1];308// 0s are usually encoded more efficiently than non-0s.309// Originally 15/8.310retval_extra += 1840 * stats->streaks[0][0];311// Originally 26/8.312retval_extra += 3360 * stats->streaks[1][0];313return retval + ((uint64_t)retval_extra << (LOG_2_PRECISION_BITS - 10));314}315316// Get the symbol entropy for the distribution 'population'.317// Set 'trivial_sym', if there's only one symbol present in the distribution.318static uint64_t PopulationCost(const uint32_t* const population, int length,319uint16_t* const trivial_sym,320uint8_t* const is_used) {321VP8LBitEntropy bit_entropy;322VP8LStreaks stats;323VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);324if (trivial_sym != NULL) {325*trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code326: VP8L_NON_TRIVIAL_SYM;327}328if (is_used != NULL) {329// The histogram is used if there is at least one non-zero streak.330*is_used = (stats.streaks[1][0] != 0 || stats.streaks[1][1] != 0);331}332333return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);334}335336static WEBP_INLINE void GetPopulationInfo(const VP8LHistogram* const histo,337HistogramIndex index,338const uint32_t** population,339int* length) {340switch (index) {341case LITERAL:342*population = histo->literal;343*length = VP8LHistogramNumCodes(histo->palette_code_bits);344break;345case RED:346*population = histo->red;347*length = NUM_LITERAL_CODES;348break;349case BLUE:350*population = histo->blue;351*length = NUM_LITERAL_CODES;352break;353case ALPHA:354*population = histo->alpha;355*length = NUM_LITERAL_CODES;356break;357case DISTANCE:358*population = histo->distance;359*length = NUM_DISTANCE_CODES;360break;361}362}363364// trivial_at_end is 1 if the two histograms only have one element that is365// non-zero: both the zero-th one, or both the last one.366// 'index' is the index of the symbol in the histogram (literal, red, blue,367// alpha, distance).368static WEBP_INLINE uint64_t GetCombinedEntropy(const VP8LHistogram* const h1,369const VP8LHistogram* const h2,370HistogramIndex index) {371const uint32_t* X;372const uint32_t* Y;373int length;374VP8LStreaks stats;375VP8LBitEntropy bit_entropy;376const int is_h1_used = h1->is_used[index];377const int is_h2_used = h2->is_used[index];378const int is_trivial = h1->trivial_symbol[index] != VP8L_NON_TRIVIAL_SYM &&379h1->trivial_symbol[index] == h2->trivial_symbol[index];380381if (is_trivial || !is_h1_used || !is_h2_used) {382if (is_h1_used) return h1->costs[index];383return h2->costs[index];384}385assert(is_h1_used && is_h2_used);386387GetPopulationInfo(h1, index, &X, &length);388GetPopulationInfo(h2, index, &Y, &length);389VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);390return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);391}392393// Estimates the Entropy + Huffman + other block overhead size cost.394uint64_t VP8LHistogramEstimateBits(const VP8LHistogram* const h) {395int i;396uint64_t cost = 0;397for (i = 0; i < 5; ++i) {398int length;399const uint32_t* population;400GetPopulationInfo(h, (HistogramIndex)i, &population, &length);401cost += PopulationCost(population, length, /*trivial_sym=*/NULL,402/*is_used=*/NULL);403}404cost += ((uint64_t)(VP8LExtraCost(h->literal + NUM_LITERAL_CODES,405NUM_LENGTH_CODES) +406VP8LExtraCost(h->distance, NUM_DISTANCE_CODES))407<< LOG_2_PRECISION_BITS);408return cost;409}410411// -----------------------------------------------------------------------------412// Various histogram combine/cost-eval functions413414// Set a + b in b, saturating at WEBP_INT64_MAX.415static WEBP_INLINE void SaturateAdd(uint64_t a, int64_t* b) {416if (*b < 0 || (int64_t)a <= WEBP_INT64_MAX - *b) {417*b += (int64_t)a;418} else {419*b = WEBP_INT64_MAX;420}421}422423// Returns 1 if the cost of the combined histogram is less than the threshold.424// Otherwise returns 0 and the cost is invalid due to early bail-out.425WEBP_NODISCARD static int GetCombinedHistogramEntropy(426const VP8LHistogram* const a, const VP8LHistogram* const b,427int64_t cost_threshold_in, uint64_t* cost, uint64_t costs[5]) {428int i;429const uint64_t cost_threshold = (uint64_t)cost_threshold_in;430assert(a->palette_code_bits == b->palette_code_bits);431if (cost_threshold_in <= 0) return 0;432*cost = 0;433434// No need to add the extra cost for length and distance as it is a constant435// that does not influence the histograms.436for (i = 0; i < 5; ++i) {437costs[i] = GetCombinedEntropy(a, b, (HistogramIndex)i);438*cost += costs[i];439if (*cost >= cost_threshold) return 0;440}441442return 1;443}444445static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const h1,446const VP8LHistogram* const h2,447VP8LHistogram* const hout) {448int i;449assert(h1->palette_code_bits == h2->palette_code_bits);450451for (i = 0; i < 5; ++i) {452int length;453const uint32_t *p1, *p2, *pout_const;454uint32_t* pout;455GetPopulationInfo(h1, (HistogramIndex)i, &p1, &length);456GetPopulationInfo(h2, (HistogramIndex)i, &p2, &length);457GetPopulationInfo(hout, (HistogramIndex)i, &pout_const, &length);458pout = (uint32_t*)pout_const;459if (h2 == hout) {460if (h1->is_used[i]) {461if (hout->is_used[i]) {462VP8LAddVectorEq(p1, pout, length);463} else {464memcpy(pout, p1, length * sizeof(pout[0]));465}466}467} else {468if (h1->is_used[i]) {469if (h2->is_used[i]) {470VP8LAddVector(p1, p2, pout, length);471} else {472memcpy(pout, p1, length * sizeof(pout[0]));473}474} else if (h2->is_used[i]) {475memcpy(pout, p2, length * sizeof(pout[0]));476} else {477memset(pout, 0, length * sizeof(pout[0]));478}479}480}481482for (i = 0; i < 5; ++i) {483hout->trivial_symbol[i] = h1->trivial_symbol[i] == h2->trivial_symbol[i]484? h1->trivial_symbol[i]485: VP8L_NON_TRIVIAL_SYM;486hout->is_used[i] = h1->is_used[i] || h2->is_used[i];487}488}489490static void UpdateHistogramCost(uint64_t bit_cost, uint64_t costs[5],491VP8LHistogram* const h) {492int i;493h->bit_cost = bit_cost;494for (i = 0; i < 5; ++i) {495h->costs[i] = costs[i];496}497}498499// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing500// to the threshold value 'cost_threshold'. The score returned is501// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.502// Since the previous score passed is 'cost_threshold', we only need to compare503// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out504// early.505// Returns 1 if the cost is less than the threshold.506// Otherwise returns 0 and the cost is invalid due to early bail-out.507WEBP_NODISCARD static int HistogramAddEval(const VP8LHistogram* const a,508const VP8LHistogram* const b,509VP8LHistogram* const out,510int64_t cost_threshold) {511const uint64_t sum_cost = a->bit_cost + b->bit_cost;512uint64_t bit_cost, costs[5];513SaturateAdd(sum_cost, &cost_threshold);514if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &bit_cost, costs)) {515return 0;516}517518HistogramAdd(a, b, out);519UpdateHistogramCost(bit_cost, costs, out);520return 1;521}522523// Same as HistogramAddEval(), except that the resulting histogram524// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit525// the term C(b) which is constant over all the evaluations.526// Returns 1 if the cost is less than the threshold.527// Otherwise returns 0 and the cost is invalid due to early bail-out.528WEBP_NODISCARD static int HistogramAddThresh(const VP8LHistogram* const a,529const VP8LHistogram* const b,530int64_t cost_threshold,531int64_t* cost_out) {532uint64_t cost, costs[5];533assert(a != NULL && b != NULL);534SaturateAdd(a->bit_cost, &cost_threshold);535if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost, costs)) {536return 0;537}538539*cost_out = (int64_t)cost - (int64_t)a->bit_cost;540return 1;541}542543// -----------------------------------------------------------------------------544545// The structure to keep track of cost range for the three dominant entropy546// symbols.547typedef struct {548uint64_t literal_max;549uint64_t literal_min;550uint64_t red_max;551uint64_t red_min;552uint64_t blue_max;553uint64_t blue_min;554} DominantCostRange;555556static void DominantCostRangeInit(DominantCostRange* const c) {557c->literal_max = 0;558c->literal_min = WEBP_UINT64_MAX;559c->red_max = 0;560c->red_min = WEBP_UINT64_MAX;561c->blue_max = 0;562c->blue_min = WEBP_UINT64_MAX;563}564565static void UpdateDominantCostRange(566const VP8LHistogram* const h, DominantCostRange* const c) {567if (c->literal_max < h->costs[LITERAL]) c->literal_max = h->costs[LITERAL];568if (c->literal_min > h->costs[LITERAL]) c->literal_min = h->costs[LITERAL];569if (c->red_max < h->costs[RED]) c->red_max = h->costs[RED];570if (c->red_min > h->costs[RED]) c->red_min = h->costs[RED];571if (c->blue_max < h->costs[BLUE]) c->blue_max = h->costs[BLUE];572if (c->blue_min > h->costs[BLUE]) c->blue_min = h->costs[BLUE];573}574575static void ComputeHistogramCost(VP8LHistogram* const h) {576int i;577// No need to add the extra cost for length and distance as it is a constant578// that does not influence the histograms.579for (i = 0; i < 5; ++i) {580const uint32_t* population;581int length;582GetPopulationInfo(h, i, &population, &length);583h->costs[i] = PopulationCost(population, length, &h->trivial_symbol[i],584&h->is_used[i]);585}586h->bit_cost = h->costs[LITERAL] + h->costs[RED] + h->costs[BLUE] +587h->costs[ALPHA] + h->costs[DISTANCE];588}589590static int GetBinIdForEntropy(uint64_t min, uint64_t max, uint64_t val) {591const uint64_t range = max - min;592if (range > 0) {593const uint64_t delta = val - min;594return (int)((NUM_PARTITIONS - 1e-6) * delta / range);595} else {596return 0;597}598}599600static int GetHistoBinIndex(const VP8LHistogram* const h,601const DominantCostRange* const c, int low_effort) {602int bin_id =603GetBinIdForEntropy(c->literal_min, c->literal_max, h->costs[LITERAL]);604assert(bin_id < NUM_PARTITIONS);605if (!low_effort) {606bin_id = bin_id * NUM_PARTITIONS +607GetBinIdForEntropy(c->red_min, c->red_max, h->costs[RED]);608bin_id = bin_id * NUM_PARTITIONS +609GetBinIdForEntropy(c->blue_min, c->blue_max, h->costs[BLUE]);610assert(bin_id < BIN_SIZE);611}612return bin_id;613}614615// Construct the histograms from backward references.616static void HistogramBuild(617int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,618VP8LHistogramSet* const image_histo) {619int x = 0, y = 0;620const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);621VP8LHistogram** const histograms = image_histo->histograms;622VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);623assert(histo_bits > 0);624VP8LHistogramSetClear(image_histo);625while (VP8LRefsCursorOk(&c)) {626const PixOrCopy* const v = c.cur_pos;627const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);628HistogramAddSinglePixOrCopy(histograms[ix], v, NULL, 0);629x += PixOrCopyLength(v);630while (x >= xsize) {631x -= xsize;632++y;633}634VP8LRefsCursorNext(&c);635}636}637638// Copies the histograms and computes its bit_cost.639static void HistogramCopyAndAnalyze(VP8LHistogramSet* const orig_histo,640VP8LHistogramSet* const image_histo) {641int i;642VP8LHistogram** const orig_histograms = orig_histo->histograms;643VP8LHistogram** const histograms = image_histo->histograms;644assert(image_histo->max_size == orig_histo->max_size);645image_histo->size = 0;646for (i = 0; i < orig_histo->max_size; ++i) {647VP8LHistogram* const histo = orig_histograms[i];648ComputeHistogramCost(histo);649650// Skip the histogram if it is completely empty, which can happen for tiles651// with no information (when they are skipped because of LZ77).652if (!histo->is_used[LITERAL] && !histo->is_used[RED] &&653!histo->is_used[BLUE] && !histo->is_used[ALPHA] &&654!histo->is_used[DISTANCE]) {655// The first histogram is always used.656assert(i > 0);657orig_histograms[i] = NULL;658} else {659// Copy histograms from orig_histo[] to image_histo[].660HistogramCopy(histo, histograms[image_histo->size]);661++image_histo->size;662}663}664}665666// Partition histograms to different entropy bins for three dominant (literal,667// red and blue) symbol costs and compute the histogram aggregate bit_cost.668static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,669int low_effort) {670int i;671VP8LHistogram** const histograms = image_histo->histograms;672const int histo_size = image_histo->size;673DominantCostRange cost_range;674DominantCostRangeInit(&cost_range);675676// Analyze the dominant (literal, red and blue) entropy costs.677for (i = 0; i < histo_size; ++i) {678UpdateDominantCostRange(histograms[i], &cost_range);679}680681// bin-hash histograms on three of the dominant (literal, red and blue)682// symbol costs and store the resulting bin_id for each histogram.683for (i = 0; i < histo_size; ++i) {684histograms[i]->bin_id =685GetHistoBinIndex(histograms[i], &cost_range, low_effort);686}687}688689// Merges some histograms with same bin_id together if it's advantageous.690// Sets the remaining histograms to NULL.691// 'combine_cost_factor' has to be divided by 100.692static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,693VP8LHistogram* cur_combo, int num_bins,694int32_t combine_cost_factor,695int low_effort) {696VP8LHistogram** const histograms = image_histo->histograms;697int idx;698struct {699int16_t first; // position of the histogram that accumulates all700// histograms with the same bin_id701uint16_t num_combine_failures; // number of combine failures per bin_id702} bin_info[BIN_SIZE];703704assert(num_bins <= BIN_SIZE);705for (idx = 0; idx < num_bins; ++idx) {706bin_info[idx].first = -1;707bin_info[idx].num_combine_failures = 0;708}709710for (idx = 0; idx < image_histo->size;) {711const int bin_id = histograms[idx]->bin_id;712const int first = bin_info[bin_id].first;713if (first == -1) {714bin_info[bin_id].first = idx;715++idx;716} else if (low_effort) {717HistogramAdd(histograms[idx], histograms[first], histograms[first]);718HistogramSetRemoveHistogram(image_histo, idx);719} else {720// try to merge #idx into #first (both share the same bin_id)721const uint64_t bit_cost = histograms[idx]->bit_cost;722const int64_t bit_cost_thresh =723-DivRound((int64_t)bit_cost * combine_cost_factor, 100);724if (HistogramAddEval(histograms[first], histograms[idx], cur_combo,725bit_cost_thresh)) {726const int max_combine_failures = 32;727// Try to merge two histograms only if the combo is a trivial one or728// the two candidate histograms are already non-trivial.729// For some images, 'try_combine' turns out to be false for a lot of730// histogram pairs. In that case, we fallback to combining731// histograms as usual to avoid increasing the header size.732int try_combine =733cur_combo->trivial_symbol[RED] != VP8L_NON_TRIVIAL_SYM &&734cur_combo->trivial_symbol[BLUE] != VP8L_NON_TRIVIAL_SYM &&735cur_combo->trivial_symbol[ALPHA] != VP8L_NON_TRIVIAL_SYM;736if (!try_combine) {737try_combine =738histograms[idx]->trivial_symbol[RED] == VP8L_NON_TRIVIAL_SYM ||739histograms[idx]->trivial_symbol[BLUE] == VP8L_NON_TRIVIAL_SYM ||740histograms[idx]->trivial_symbol[ALPHA] == VP8L_NON_TRIVIAL_SYM;741try_combine &=742histograms[first]->trivial_symbol[RED] == VP8L_NON_TRIVIAL_SYM ||743histograms[first]->trivial_symbol[BLUE] == VP8L_NON_TRIVIAL_SYM ||744histograms[first]->trivial_symbol[ALPHA] == VP8L_NON_TRIVIAL_SYM;745}746if (try_combine ||747bin_info[bin_id].num_combine_failures >= max_combine_failures) {748// move the (better) merged histogram to its final slot749HistogramSwap(&cur_combo, &histograms[first]);750HistogramSetRemoveHistogram(image_histo, idx);751} else {752++bin_info[bin_id].num_combine_failures;753++idx;754}755} else {756++idx;757}758}759}760if (low_effort) {761// for low_effort case, update the final cost when everything is merged762for (idx = 0; idx < image_histo->size; ++idx) {763ComputeHistogramCost(histograms[idx]);764}765}766}767768// Implement a Lehmer random number generator with a multiplicative constant of769// 48271 and a modulo constant of 2^31 - 1.770static uint32_t MyRand(uint32_t* const seed) {771*seed = (uint32_t)(((uint64_t)(*seed) * 48271u) % 2147483647u);772assert(*seed > 0);773return *seed;774}775776// -----------------------------------------------------------------------------777// Histogram pairs priority queue778779// Pair of histograms. Negative idx1 value means that pair is out-of-date.780typedef struct {781int idx1;782int idx2;783int64_t cost_diff;784uint64_t cost_combo;785uint64_t costs[5];786} HistogramPair;787788typedef struct {789HistogramPair* queue;790int size;791int max_size;792} HistoQueue;793794static int HistoQueueInit(HistoQueue* const histo_queue, const int max_size) {795histo_queue->size = 0;796histo_queue->max_size = max_size;797// We allocate max_size + 1 because the last element at index "size" is798// used as temporary data (and it could be up to max_size).799histo_queue->queue = (HistogramPair*)WebPSafeMalloc(800histo_queue->max_size + 1, sizeof(*histo_queue->queue));801return histo_queue->queue != NULL;802}803804static void HistoQueueClear(HistoQueue* const histo_queue) {805assert(histo_queue != NULL);806WebPSafeFree(histo_queue->queue);807histo_queue->size = 0;808histo_queue->max_size = 0;809}810811// Pop a specific pair in the queue by replacing it with the last one812// and shrinking the queue.813static void HistoQueuePopPair(HistoQueue* const histo_queue,814HistogramPair* const pair) {815assert(pair >= histo_queue->queue &&816pair < (histo_queue->queue + histo_queue->size));817assert(histo_queue->size > 0);818*pair = histo_queue->queue[histo_queue->size - 1];819--histo_queue->size;820}821822// Check whether a pair in the queue should be updated as head or not.823static void HistoQueueUpdateHead(HistoQueue* const histo_queue,824HistogramPair* const pair) {825assert(pair->cost_diff < 0);826assert(pair >= histo_queue->queue &&827pair < (histo_queue->queue + histo_queue->size));828assert(histo_queue->size > 0);829if (pair->cost_diff < histo_queue->queue[0].cost_diff) {830// Replace the best pair.831const HistogramPair tmp = histo_queue->queue[0];832histo_queue->queue[0] = *pair;833*pair = tmp;834}835}836837// Replaces the bad_id with good_id in the pair.838static void HistoQueueFixPair(int bad_id, int good_id,839HistogramPair* const pair) {840if (pair->idx1 == bad_id) pair->idx1 = good_id;841if (pair->idx2 == bad_id) pair->idx2 = good_id;842if (pair->idx1 > pair->idx2) {843const int tmp = pair->idx1;844pair->idx1 = pair->idx2;845pair->idx2 = tmp;846}847}848849// Update the cost diff and combo of a pair of histograms. This needs to be850// called when the histograms have been merged with a third one.851// Returns 1 if the cost diff is less than the threshold.852// Otherwise returns 0 and the cost is invalid due to early bail-out.853WEBP_NODISCARD static int HistoQueueUpdatePair(const VP8LHistogram* const h1,854const VP8LHistogram* const h2,855int64_t cost_threshold,856HistogramPair* const pair) {857const int64_t sum_cost = h1->bit_cost + h2->bit_cost;858SaturateAdd(sum_cost, &cost_threshold);859if (!GetCombinedHistogramEntropy(h1, h2, cost_threshold, &pair->cost_combo,860pair->costs)) {861return 0;862}863pair->cost_diff = (int64_t)pair->cost_combo - sum_cost;864return 1;865}866867// Create a pair from indices "idx1" and "idx2" provided its cost868// is inferior to "threshold", a negative entropy.869// It returns the cost of the pair, or 0 if it superior to threshold.870static int64_t HistoQueuePush(HistoQueue* const histo_queue,871VP8LHistogram** const histograms, int idx1,872int idx2, int64_t threshold) {873const VP8LHistogram* h1;874const VP8LHistogram* h2;875HistogramPair pair;876877// Stop here if the queue is full.878if (histo_queue->size == histo_queue->max_size) return 0;879assert(threshold <= 0);880if (idx1 > idx2) {881const int tmp = idx2;882idx2 = idx1;883idx1 = tmp;884}885pair.idx1 = idx1;886pair.idx2 = idx2;887h1 = histograms[idx1];888h2 = histograms[idx2];889890// Do not even consider the pair if it does not improve the entropy.891if (!HistoQueueUpdatePair(h1, h2, threshold, &pair)) return 0;892893histo_queue->queue[histo_queue->size++] = pair;894HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]);895896return pair.cost_diff;897}898899// -----------------------------------------------------------------------------900901// Combines histograms by continuously choosing the one with the highest cost902// reduction.903static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {904int ok = 0;905const int image_histo_size = image_histo->size;906int i, j;907VP8LHistogram** const histograms = image_histo->histograms;908// Priority queue of histogram pairs.909HistoQueue histo_queue;910911// image_histo_size^2 for the queue size is safe. If you look at912// HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes913// data to the queue, you insert at most:914// - image_histo_size*(image_histo_size-1)/2 (the first two for loops)915// - image_histo_size - 1 in the last for loop at the first iteration of916// the while loop, image_histo_size - 2 at the second iteration ...917// therefore image_histo_size*(image_histo_size-1)/2 overall too918if (!HistoQueueInit(&histo_queue, image_histo_size * image_histo_size)) {919goto End;920}921922// Initialize the queue.923for (i = 0; i < image_histo_size; ++i) {924for (j = i + 1; j < image_histo_size; ++j) {925HistoQueuePush(&histo_queue, histograms, i, j, 0);926}927}928929while (histo_queue.size > 0) {930const int idx1 = histo_queue.queue[0].idx1;931const int idx2 = histo_queue.queue[0].idx2;932HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);933UpdateHistogramCost(histo_queue.queue[0].cost_combo,934histo_queue.queue[0].costs, histograms[idx1]);935936// Remove merged histogram.937HistogramSetRemoveHistogram(image_histo, idx2);938939// Remove pairs intersecting the just combined best pair.940for (i = 0; i < histo_queue.size;) {941HistogramPair* const p = histo_queue.queue + i;942if (p->idx1 == idx1 || p->idx2 == idx1 ||943p->idx1 == idx2 || p->idx2 == idx2) {944HistoQueuePopPair(&histo_queue, p);945} else {946HistoQueueFixPair(image_histo->size, idx2, p);947HistoQueueUpdateHead(&histo_queue, p);948++i;949}950}951952// Push new pairs formed with combined histogram to the queue.953for (i = 0; i < image_histo->size; ++i) {954if (i == idx1) continue;955HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0);956}957}958959ok = 1;960961End:962HistoQueueClear(&histo_queue);963return ok;964}965966// Perform histogram aggregation using a stochastic approach.967// 'do_greedy' is set to 1 if a greedy approach needs to be performed968// afterwards, 0 otherwise.969static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,970int min_cluster_size,971int* const do_greedy) {972int j, iter;973uint32_t seed = 1;974int tries_with_no_success = 0;975const int outer_iters = image_histo->size;976const int num_tries_no_success = outer_iters / 2;977VP8LHistogram** const histograms = image_histo->histograms;978// Priority queue of histogram pairs. Its size of 'kHistoQueueSize'979// impacts the quality of the compression and the speed: the smaller the980// faster but the worse for the compression.981HistoQueue histo_queue;982const int kHistoQueueSize = 9;983int ok = 0;984985if (image_histo->size < min_cluster_size) {986*do_greedy = 1;987return 1;988}989990if (!HistoQueueInit(&histo_queue, kHistoQueueSize)) goto End;991992// Collapse similar histograms in 'image_histo'.993for (iter = 0; iter < outer_iters && image_histo->size >= min_cluster_size &&994++tries_with_no_success < num_tries_no_success;995++iter) {996int64_t best_cost =997(histo_queue.size == 0) ? 0 : histo_queue.queue[0].cost_diff;998int best_idx1 = -1, best_idx2 = 1;999const uint32_t rand_range = (image_histo->size - 1) * (image_histo->size);1000// (image_histo->size) / 2 was chosen empirically. Less means faster but1001// worse compression.1002const int num_tries = (image_histo->size) / 2;10031004// Pick random samples.1005for (j = 0; image_histo->size >= 2 && j < num_tries; ++j) {1006int64_t curr_cost;1007// Choose two different histograms at random and try to combine them.1008const uint32_t tmp = MyRand(&seed) % rand_range;1009uint32_t idx1 = tmp / (image_histo->size - 1);1010uint32_t idx2 = tmp % (image_histo->size - 1);1011if (idx2 >= idx1) ++idx2;10121013// Calculate cost reduction on combination.1014curr_cost =1015HistoQueuePush(&histo_queue, histograms, idx1, idx2, best_cost);1016if (curr_cost < 0) { // found a better pair?1017best_cost = curr_cost;1018// Empty the queue if we reached full capacity.1019if (histo_queue.size == histo_queue.max_size) break;1020}1021}1022if (histo_queue.size == 0) continue;10231024// Get the best histograms.1025best_idx1 = histo_queue.queue[0].idx1;1026best_idx2 = histo_queue.queue[0].idx2;1027assert(best_idx1 < best_idx2);1028// Merge the histograms and remove best_idx2 from the queue.1029HistogramAdd(histograms[best_idx2], histograms[best_idx1],1030histograms[best_idx1]);1031UpdateHistogramCost(histo_queue.queue[0].cost_combo,1032histo_queue.queue[0].costs, histograms[best_idx1]);1033HistogramSetRemoveHistogram(image_histo, best_idx2);1034// Parse the queue and update each pair that deals with best_idx1,1035// best_idx2 or image_histo_size.1036for (j = 0; j < histo_queue.size;) {1037HistogramPair* const p = histo_queue.queue + j;1038const int is_idx1_best = p->idx1 == best_idx1 || p->idx1 == best_idx2;1039const int is_idx2_best = p->idx2 == best_idx1 || p->idx2 == best_idx2;1040// The front pair could have been duplicated by a random pick so1041// check for it all the time nevertheless.1042if (is_idx1_best && is_idx2_best) {1043HistoQueuePopPair(&histo_queue, p);1044continue;1045}1046// Any pair containing one of the two best indices should only refer to1047// best_idx1. Its cost should also be updated.1048if (is_idx1_best || is_idx2_best) {1049HistoQueueFixPair(best_idx2, best_idx1, p);1050// Re-evaluate the cost of an updated pair.1051if (!HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0,1052p)) {1053HistoQueuePopPair(&histo_queue, p);1054continue;1055}1056}1057HistoQueueFixPair(image_histo->size, best_idx2, p);1058HistoQueueUpdateHead(&histo_queue, p);1059++j;1060}1061tries_with_no_success = 0;1062}1063*do_greedy = (image_histo->size <= min_cluster_size);1064ok = 1;10651066End:1067HistoQueueClear(&histo_queue);1068return ok;1069}10701071// -----------------------------------------------------------------------------1072// Histogram refinement10731074// Find the best 'out' histogram for each of the 'in' histograms.1075// At call-time, 'out' contains the histograms of the clusters.1076// Note: we assume that out[]->bit_cost is already up-to-date.1077static void HistogramRemap(const VP8LHistogramSet* const in,1078VP8LHistogramSet* const out,1079uint32_t* const symbols) {1080int i;1081VP8LHistogram** const in_histo = in->histograms;1082VP8LHistogram** const out_histo = out->histograms;1083const int in_size = out->max_size;1084const int out_size = out->size;1085if (out_size > 1) {1086for (i = 0; i < in_size; ++i) {1087int best_out = 0;1088int64_t best_bits = WEBP_INT64_MAX;1089int k;1090if (in_histo[i] == NULL) {1091// Arbitrarily set to the previous value if unused to help future LZ77.1092symbols[i] = symbols[i - 1];1093continue;1094}1095for (k = 0; k < out_size; ++k) {1096int64_t cur_bits;1097if (HistogramAddThresh(out_histo[k], in_histo[i], best_bits,1098&cur_bits)) {1099best_bits = cur_bits;1100best_out = k;1101}1102}1103symbols[i] = best_out;1104}1105} else {1106assert(out_size == 1);1107for (i = 0; i < in_size; ++i) {1108symbols[i] = 0;1109}1110}11111112// Recompute each out based on raw and symbols.1113VP8LHistogramSetClear(out);1114out->size = out_size;11151116for (i = 0; i < in_size; ++i) {1117int idx;1118if (in_histo[i] == NULL) continue;1119idx = symbols[i];1120HistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]);1121}1122}11231124static int32_t GetCombineCostFactor(int histo_size, int quality) {1125int32_t combine_cost_factor = 16;1126if (quality < 90) {1127if (histo_size > 256) combine_cost_factor /= 2;1128if (histo_size > 512) combine_cost_factor /= 2;1129if (histo_size > 1024) combine_cost_factor /= 2;1130if (quality <= 50) combine_cost_factor /= 2;1131}1132return combine_cost_factor;1133}11341135int VP8LGetHistoImageSymbols(int xsize, int ysize,1136const VP8LBackwardRefs* const refs, int quality,1137int low_effort, int histogram_bits, int cache_bits,1138VP8LHistogramSet* const image_histo,1139VP8LHistogram* const tmp_histo,1140uint32_t* const histogram_symbols,1141const WebPPicture* const pic, int percent_range,1142int* const percent) {1143const int histo_xsize =1144histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1;1145const int histo_ysize =1146histogram_bits ? VP8LSubSampleSize(ysize, histogram_bits) : 1;1147const int image_histo_raw_size = histo_xsize * histo_ysize;1148VP8LHistogramSet* const orig_histo =1149VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);1150// Don't attempt linear bin-partition heuristic for1151// histograms of small sizes (as bin_map will be very sparse) and1152// maximum quality q==100 (to preserve the compression gains at that level).1153const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;1154int entropy_combine;1155if (orig_histo == NULL) {1156WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);1157goto Error;1158}11591160// Construct the histograms from backward references.1161HistogramBuild(xsize, histogram_bits, refs, orig_histo);1162HistogramCopyAndAnalyze(orig_histo, image_histo);1163entropy_combine =1164(image_histo->size > entropy_combine_num_bins * 2) && (quality < 100);11651166if (entropy_combine) {1167const int32_t combine_cost_factor =1168GetCombineCostFactor(image_histo_raw_size, quality);11691170HistogramAnalyzeEntropyBin(image_histo, low_effort);1171// Collapse histograms with similar entropy.1172HistogramCombineEntropyBin(image_histo, tmp_histo, entropy_combine_num_bins,1173combine_cost_factor, low_effort);1174}11751176// Don't combine the histograms using stochastic and greedy heuristics for1177// low-effort compression mode.1178if (!low_effort || !entropy_combine) {1179// cubic ramp between 1 and MAX_HISTO_GREEDY:1180const int threshold_size =1181(int)(1 + DivRound(quality * quality * quality * (MAX_HISTO_GREEDY - 1),1182100 * 100 * 100));1183int do_greedy;1184if (!HistogramCombineStochastic(image_histo, threshold_size, &do_greedy)) {1185WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);1186goto Error;1187}1188if (do_greedy) {1189if (!HistogramCombineGreedy(image_histo)) {1190WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);1191goto Error;1192}1193}1194}11951196// Find the optimal map from original histograms to the final ones.1197HistogramRemap(orig_histo, image_histo, histogram_symbols);11981199if (!WebPReportProgress(pic, *percent + percent_range, percent)) {1200goto Error;1201}12021203Error:1204VP8LFreeHistogramSet(orig_histo);1205return (pic->error_code == VP8_ENC_OK);1206}120712081209