Path: blob/master/3rdparty/libwebp/src/enc/histogram_enc.c
16345 views
// Copyright 2012 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// Author: Jyrki Alakuijala ([email protected])10//11#ifdef HAVE_CONFIG_H12#include "src/webp/config.h"13#endif1415#include <math.h>1617#include "src/enc/backward_references_enc.h"18#include "src/enc/histogram_enc.h"19#include "src/dsp/lossless.h"20#include "src/dsp/lossless_common.h"21#include "src/utils/utils.h"2223#define MAX_COST 1.e382425// Number of partitions for the three dominant (literal, red and blue) symbol26// costs.27#define NUM_PARTITIONS 428// The size of the bin-hash corresponding to the three dominant costs.29#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)30// Maximum number of histograms allowed in greedy combining algorithm.31#define MAX_HISTO_GREEDY 1003233static void HistogramClear(VP8LHistogram* const p) {34uint32_t* const literal = p->literal_;35const int cache_bits = p->palette_code_bits_;36const int histo_size = VP8LGetHistogramSize(cache_bits);37memset(p, 0, histo_size);38p->palette_code_bits_ = cache_bits;39p->literal_ = literal;40}4142// Swap two histogram pointers.43static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) {44VP8LHistogram* const tmp = *A;45*A = *B;46*B = tmp;47}4849static void HistogramCopy(const VP8LHistogram* const src,50VP8LHistogram* const dst) {51uint32_t* const dst_literal = dst->literal_;52const int dst_cache_bits = dst->palette_code_bits_;53const int histo_size = VP8LGetHistogramSize(dst_cache_bits);54assert(src->palette_code_bits_ == dst_cache_bits);55memcpy(dst, src, histo_size);56dst->literal_ = dst_literal;57}5859int VP8LGetHistogramSize(int cache_bits) {60const int literal_size = VP8LHistogramNumCodes(cache_bits);61const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;62assert(total_size <= (size_t)0x7fffffff);63return (int)total_size;64}6566void VP8LFreeHistogram(VP8LHistogram* const histo) {67WebPSafeFree(histo);68}6970void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {71WebPSafeFree(histo);72}7374void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,75VP8LHistogram* const histo) {76VP8LRefsCursor c = VP8LRefsCursorInit(refs);77while (VP8LRefsCursorOk(&c)) {78VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, NULL, 0);79VP8LRefsCursorNext(&c);80}81}8283void VP8LHistogramCreate(VP8LHistogram* const p,84const VP8LBackwardRefs* const refs,85int palette_code_bits) {86if (palette_code_bits >= 0) {87p->palette_code_bits_ = palette_code_bits;88}89HistogramClear(p);90VP8LHistogramStoreRefs(refs, p);91}9293void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {94p->palette_code_bits_ = palette_code_bits;95HistogramClear(p);96}9798VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {99VP8LHistogram* histo = NULL;100const int total_size = VP8LGetHistogramSize(cache_bits);101uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));102if (memory == NULL) return NULL;103histo = (VP8LHistogram*)memory;104// literal_ won't necessary be aligned.105histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));106VP8LHistogramInit(histo, cache_bits);107return histo;108}109110VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {111int i;112VP8LHistogramSet* set;113const int histo_size = VP8LGetHistogramSize(cache_bits);114const size_t total_size =115sizeof(*set) + size * (sizeof(*set->histograms) +116histo_size + WEBP_ALIGN_CST);117uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));118if (memory == NULL) return NULL;119120set = (VP8LHistogramSet*)memory;121memory += sizeof(*set);122set->histograms = (VP8LHistogram**)memory;123memory += size * sizeof(*set->histograms);124set->max_size = size;125set->size = size;126for (i = 0; i < size; ++i) {127memory = (uint8_t*)WEBP_ALIGN(memory);128set->histograms[i] = (VP8LHistogram*)memory;129// literal_ won't necessary be aligned.130set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));131VP8LHistogramInit(set->histograms[i], cache_bits);132memory += histo_size;133}134return set;135}136137// -----------------------------------------------------------------------------138139void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,140const PixOrCopy* const v,141int (*const distance_modifier)(int, int),142int distance_modifier_arg0) {143if (PixOrCopyIsLiteral(v)) {144++histo->alpha_[PixOrCopyLiteral(v, 3)];145++histo->red_[PixOrCopyLiteral(v, 2)];146++histo->literal_[PixOrCopyLiteral(v, 1)];147++histo->blue_[PixOrCopyLiteral(v, 0)];148} else if (PixOrCopyIsCacheIdx(v)) {149const int literal_ix =150NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);151++histo->literal_[literal_ix];152} else {153int code, extra_bits;154VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);155++histo->literal_[NUM_LITERAL_CODES + code];156if (distance_modifier == NULL) {157VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);158} else {159VP8LPrefixEncodeBits(160distance_modifier(distance_modifier_arg0, PixOrCopyDistance(v)),161&code, &extra_bits);162}163++histo->distance_[code];164}165}166167// -----------------------------------------------------------------------------168// Entropy-related functions.169170static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {171double mix;172if (entropy->nonzeros < 5) {173if (entropy->nonzeros <= 1) {174return 0;175}176// Two symbols, they will be 0 and 1 in a Huffman code.177// Let's mix in a bit of entropy to favor good clustering when178// distributions of these are combined.179if (entropy->nonzeros == 2) {180return 0.99 * entropy->sum + 0.01 * entropy->entropy;181}182// No matter what the entropy says, we cannot be better than min_limit183// with Huffman coding. I am mixing a bit of entropy into the184// min_limit since it produces much better (~0.5 %) compression results185// perhaps because of better entropy clustering.186if (entropy->nonzeros == 3) {187mix = 0.95;188} else {189mix = 0.7; // nonzeros == 4.190}191} else {192mix = 0.627;193}194195{196double min_limit = 2 * entropy->sum - entropy->max_val;197min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;198return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;199}200}201202double VP8LBitsEntropy(const uint32_t* const array, int n) {203VP8LBitEntropy entropy;204VP8LBitsEntropyUnrefined(array, n, &entropy);205206return BitsEntropyRefine(&entropy);207}208209static double InitialHuffmanCost(void) {210// Small bias because Huffman code length is typically not stored in211// full length.212static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;213static const double kSmallBias = 9.1;214return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;215}216217// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)218static double FinalHuffmanCost(const VP8LStreaks* const stats) {219// The constants in this function are experimental and got rounded from220// their original values in 1/8 when switched to 1/1024.221double retval = InitialHuffmanCost();222// Second coefficient: Many zeros in the histogram are covered efficiently223// by a run-length encode. Originally 2/8.224retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];225// Second coefficient: Constant values are encoded less efficiently, but still226// RLE'ed. Originally 6/8.227retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];228// 0s are usually encoded more efficiently than non-0s.229// Originally 15/8.230retval += 1.796875 * stats->streaks[0][0];231// Originally 26/8.232retval += 3.28125 * stats->streaks[1][0];233return retval;234}235236// Get the symbol entropy for the distribution 'population'.237// Set 'trivial_sym', if there's only one symbol present in the distribution.238static double PopulationCost(const uint32_t* const population, int length,239uint32_t* const trivial_sym) {240VP8LBitEntropy bit_entropy;241VP8LStreaks stats;242VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);243if (trivial_sym != NULL) {244*trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code245: VP8L_NON_TRIVIAL_SYM;246}247248return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);249}250251// trivial_at_end is 1 if the two histograms only have one element that is252// non-zero: both the zero-th one, or both the last one.253static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,254const uint32_t* const Y,255int length, int trivial_at_end) {256VP8LStreaks stats;257if (trivial_at_end) {258// This configuration is due to palettization that transforms an indexed259// pixel into 0xff000000 | (pixel << 8) in VP8LBundleColorMap.260// BitsEntropyRefine is 0 for histograms with only one non-zero value.261// Only FinalHuffmanCost needs to be evaluated.262memset(&stats, 0, sizeof(stats));263// Deal with the non-zero value at index 0 or length-1.264stats.streaks[1][0] += 1;265// Deal with the following/previous zero streak.266stats.counts[0] += 1;267stats.streaks[0][1] += length - 1;268return FinalHuffmanCost(&stats);269} else {270VP8LBitEntropy bit_entropy;271VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);272273return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);274}275}276277// Estimates the Entropy + Huffman + other block overhead size cost.278double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {279return280PopulationCost(281p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)282+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)283+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)284+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)285+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)286+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)287+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);288}289290// -----------------------------------------------------------------------------291// Various histogram combine/cost-eval functions292293static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,294const VP8LHistogram* const b,295double cost_threshold,296double* cost) {297const int palette_code_bits = a->palette_code_bits_;298int trivial_at_end = 0;299assert(a->palette_code_bits_ == b->palette_code_bits_);300*cost += GetCombinedEntropy(a->literal_, b->literal_,301VP8LHistogramNumCodes(palette_code_bits), 0);302*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,303b->literal_ + NUM_LITERAL_CODES,304NUM_LENGTH_CODES);305if (*cost > cost_threshold) return 0;306307if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM &&308a->trivial_symbol_ == b->trivial_symbol_) {309// A, R and B are all 0 or 0xff.310const uint32_t color_a = (a->trivial_symbol_ >> 24) & 0xff;311const uint32_t color_r = (a->trivial_symbol_ >> 16) & 0xff;312const uint32_t color_b = (a->trivial_symbol_ >> 0) & 0xff;313if ((color_a == 0 || color_a == 0xff) &&314(color_r == 0 || color_r == 0xff) &&315(color_b == 0 || color_b == 0xff)) {316trivial_at_end = 1;317}318}319320*cost +=321GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end);322if (*cost > cost_threshold) return 0;323324*cost +=325GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end);326if (*cost > cost_threshold) return 0;327328*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,329trivial_at_end);330if (*cost > cost_threshold) return 0;331332*cost +=333GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0);334*cost +=335VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);336if (*cost > cost_threshold) return 0;337338return 1;339}340341static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,342const VP8LHistogram* const b,343VP8LHistogram* const out) {344VP8LHistogramAdd(a, b, out);345out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_)346? a->trivial_symbol_347: VP8L_NON_TRIVIAL_SYM;348}349350// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing351// to the threshold value 'cost_threshold'. The score returned is352// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.353// Since the previous score passed is 'cost_threshold', we only need to compare354// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out355// early.356static double HistogramAddEval(const VP8LHistogram* const a,357const VP8LHistogram* const b,358VP8LHistogram* const out,359double cost_threshold) {360double cost = 0;361const double sum_cost = a->bit_cost_ + b->bit_cost_;362cost_threshold += sum_cost;363364if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {365HistogramAdd(a, b, out);366out->bit_cost_ = cost;367out->palette_code_bits_ = a->palette_code_bits_;368}369370return cost - sum_cost;371}372373// Same as HistogramAddEval(), except that the resulting histogram374// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit375// the term C(b) which is constant over all the evaluations.376static double HistogramAddThresh(const VP8LHistogram* const a,377const VP8LHistogram* const b,378double cost_threshold) {379double cost = -a->bit_cost_;380GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);381return cost;382}383384// -----------------------------------------------------------------------------385386// The structure to keep track of cost range for the three dominant entropy387// symbols.388// TODO(skal): Evaluate if float can be used here instead of double for389// representing the entropy costs.390typedef struct {391double literal_max_;392double literal_min_;393double red_max_;394double red_min_;395double blue_max_;396double blue_min_;397} DominantCostRange;398399static void DominantCostRangeInit(DominantCostRange* const c) {400c->literal_max_ = 0.;401c->literal_min_ = MAX_COST;402c->red_max_ = 0.;403c->red_min_ = MAX_COST;404c->blue_max_ = 0.;405c->blue_min_ = MAX_COST;406}407408static void UpdateDominantCostRange(409const VP8LHistogram* const h, DominantCostRange* const c) {410if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;411if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;412if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;413if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;414if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;415if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;416}417418static void UpdateHistogramCost(VP8LHistogram* const h) {419uint32_t alpha_sym, red_sym, blue_sym;420const double alpha_cost =421PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);422const double distance_cost =423PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +424VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);425const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);426h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +427VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,428NUM_LENGTH_CODES);429h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);430h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);431h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +432alpha_cost + distance_cost;433if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {434h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;435} else {436h->trivial_symbol_ =437((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0);438}439}440441static int GetBinIdForEntropy(double min, double max, double val) {442const double range = max - min;443if (range > 0.) {444const double delta = val - min;445return (int)((NUM_PARTITIONS - 1e-6) * delta / range);446} else {447return 0;448}449}450451static int GetHistoBinIndex(const VP8LHistogram* const h,452const DominantCostRange* const c, int low_effort) {453int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,454h->literal_cost_);455assert(bin_id < NUM_PARTITIONS);456if (!low_effort) {457bin_id = bin_id * NUM_PARTITIONS458+ GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_);459bin_id = bin_id * NUM_PARTITIONS460+ GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_);461assert(bin_id < BIN_SIZE);462}463return bin_id;464}465466// Construct the histograms from backward references.467static void HistogramBuild(468int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,469VP8LHistogramSet* const image_histo) {470int x = 0, y = 0;471const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);472VP8LHistogram** const histograms = image_histo->histograms;473VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);474assert(histo_bits > 0);475while (VP8LRefsCursorOk(&c)) {476const PixOrCopy* const v = c.cur_pos;477const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);478VP8LHistogramAddSinglePixOrCopy(histograms[ix], v, NULL, 0);479x += PixOrCopyLength(v);480while (x >= xsize) {481x -= xsize;482++y;483}484VP8LRefsCursorNext(&c);485}486}487488// Copies the histograms and computes its bit_cost.489static void HistogramCopyAndAnalyze(490VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {491int i;492const int histo_size = orig_histo->size;493VP8LHistogram** const orig_histograms = orig_histo->histograms;494VP8LHistogram** const histograms = image_histo->histograms;495for (i = 0; i < histo_size; ++i) {496VP8LHistogram* const histo = orig_histograms[i];497UpdateHistogramCost(histo);498// Copy histograms from orig_histo[] to image_histo[].499HistogramCopy(histo, histograms[i]);500}501}502503// Partition histograms to different entropy bins for three dominant (literal,504// red and blue) symbol costs and compute the histogram aggregate bit_cost.505static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,506uint16_t* const bin_map,507int low_effort) {508int i;509VP8LHistogram** const histograms = image_histo->histograms;510const int histo_size = image_histo->size;511DominantCostRange cost_range;512DominantCostRangeInit(&cost_range);513514// Analyze the dominant (literal, red and blue) entropy costs.515for (i = 0; i < histo_size; ++i) {516UpdateDominantCostRange(histograms[i], &cost_range);517}518519// bin-hash histograms on three of the dominant (literal, red and blue)520// symbol costs and store the resulting bin_id for each histogram.521for (i = 0; i < histo_size; ++i) {522bin_map[i] = GetHistoBinIndex(histograms[i], &cost_range, low_effort);523}524}525526// Compact image_histo[] by merging some histograms with same bin_id together if527// it's advantageous.528static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,529VP8LHistogram* cur_combo,530const uint16_t* const bin_map,531int bin_map_size, int num_bins,532double combine_cost_factor,533int low_effort) {534VP8LHistogram** const histograms = image_histo->histograms;535int idx;536// Work in-place: processed histograms are put at the beginning of537// image_histo[]. At the end, we just have to truncate the array.538int size = 0;539struct {540int16_t first; // position of the histogram that accumulates all541// histograms with the same bin_id542uint16_t num_combine_failures; // number of combine failures per bin_id543} bin_info[BIN_SIZE];544545assert(num_bins <= BIN_SIZE);546for (idx = 0; idx < num_bins; ++idx) {547bin_info[idx].first = -1;548bin_info[idx].num_combine_failures = 0;549}550551for (idx = 0; idx < bin_map_size; ++idx) {552const int bin_id = bin_map[idx];553const int first = bin_info[bin_id].first;554assert(size <= idx);555if (first == -1) {556// just move histogram #idx to its final position557histograms[size] = histograms[idx];558bin_info[bin_id].first = size++;559} else if (low_effort) {560HistogramAdd(histograms[idx], histograms[first], histograms[first]);561} else {562// try to merge #idx into #first (both share the same bin_id)563const double bit_cost = histograms[idx]->bit_cost_;564const double bit_cost_thresh = -bit_cost * combine_cost_factor;565const double curr_cost_diff =566HistogramAddEval(histograms[first], histograms[idx],567cur_combo, bit_cost_thresh);568if (curr_cost_diff < bit_cost_thresh) {569// Try to merge two histograms only if the combo is a trivial one or570// the two candidate histograms are already non-trivial.571// For some images, 'try_combine' turns out to be false for a lot of572// histogram pairs. In that case, we fallback to combining573// histograms as usual to avoid increasing the header size.574const int try_combine =575(cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||576((histograms[idx]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&577(histograms[first]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));578const int max_combine_failures = 32;579if (try_combine ||580bin_info[bin_id].num_combine_failures >= max_combine_failures) {581// move the (better) merged histogram to its final slot582HistogramSwap(&cur_combo, &histograms[first]);583} else {584histograms[size++] = histograms[idx];585++bin_info[bin_id].num_combine_failures;586}587} else {588histograms[size++] = histograms[idx];589}590}591}592image_histo->size = size;593if (low_effort) {594// for low_effort case, update the final cost when everything is merged595for (idx = 0; idx < size; ++idx) {596UpdateHistogramCost(histograms[idx]);597}598}599}600601// Implement a Lehmer random number generator with a multiplicative constant of602// 48271 and a modulo constant of 2^31 - 1.603static uint32_t MyRand(uint32_t* const seed) {604*seed = (uint32_t)(((uint64_t)(*seed) * 48271u) % 2147483647u);605assert(*seed > 0);606return *seed;607}608609// -----------------------------------------------------------------------------610// Histogram pairs priority queue611612// Pair of histograms. Negative idx1 value means that pair is out-of-date.613typedef struct {614int idx1;615int idx2;616double cost_diff;617double cost_combo;618} HistogramPair;619620typedef struct {621HistogramPair* queue;622int size;623int max_size;624} HistoQueue;625626static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {627histo_queue->size = 0;628// max_index^2 for the queue size is safe. If you look at629// HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes630// data to the queue, you insert at most:631// - max_index*(max_index-1)/2 (the first two for loops)632// - max_index - 1 in the last for loop at the first iteration of the while633// loop, max_index - 2 at the second iteration ... therefore634// max_index*(max_index-1)/2 overall too635histo_queue->max_size = max_index * max_index;636// We allocate max_size + 1 because the last element at index "size" is637// used as temporary data (and it could be up to max_size).638histo_queue->queue = (HistogramPair*)WebPSafeMalloc(639histo_queue->max_size + 1, sizeof(*histo_queue->queue));640return histo_queue->queue != NULL;641}642643static void HistoQueueClear(HistoQueue* const histo_queue) {644assert(histo_queue != NULL);645WebPSafeFree(histo_queue->queue);646histo_queue->size = 0;647histo_queue->max_size = 0;648}649650// Pop a specific pair in the queue by replacing it with the last one651// and shrinking the queue.652static void HistoQueuePopPair(HistoQueue* const histo_queue,653HistogramPair* const pair) {654assert(pair >= histo_queue->queue &&655pair < (histo_queue->queue + histo_queue->size));656assert(histo_queue->size > 0);657*pair = histo_queue->queue[histo_queue->size - 1];658--histo_queue->size;659}660661// Check whether a pair in the queue should be updated as head or not.662static void HistoQueueUpdateHead(HistoQueue* const histo_queue,663HistogramPair* const pair) {664assert(pair->cost_diff < 0.);665assert(pair >= histo_queue->queue &&666pair < (histo_queue->queue + histo_queue->size));667assert(histo_queue->size > 0);668if (pair->cost_diff < histo_queue->queue[0].cost_diff) {669// Replace the best pair.670const HistogramPair tmp = histo_queue->queue[0];671histo_queue->queue[0] = *pair;672*pair = tmp;673}674}675676// Create a pair from indices "idx1" and "idx2" provided its cost677// is inferior to "threshold", a negative entropy.678// It returns the cost of the pair, or 0. if it superior to threshold.679static double HistoQueuePush(HistoQueue* const histo_queue,680VP8LHistogram** const histograms, int idx1,681int idx2, double threshold) {682const VP8LHistogram* h1;683const VP8LHistogram* h2;684HistogramPair pair;685double sum_cost;686687assert(threshold <= 0.);688if (idx1 > idx2) {689const int tmp = idx2;690idx2 = idx1;691idx1 = tmp;692}693pair.idx1 = idx1;694pair.idx2 = idx2;695h1 = histograms[idx1];696h2 = histograms[idx2];697sum_cost = h1->bit_cost_ + h2->bit_cost_;698pair.cost_combo = 0.;699GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair.cost_combo);700pair.cost_diff = pair.cost_combo - sum_cost;701702// Do not even consider the pair if it does not improve the entropy.703if (pair.cost_diff >= threshold) return 0.;704705// We cannot add more elements than the capacity.706assert(histo_queue->size < histo_queue->max_size);707histo_queue->queue[histo_queue->size++] = pair;708HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]);709710return pair.cost_diff;711}712713// -----------------------------------------------------------------------------714715// Combines histograms by continuously choosing the one with the highest cost716// reduction.717static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {718int ok = 0;719int image_histo_size = image_histo->size;720int i, j;721VP8LHistogram** const histograms = image_histo->histograms;722// Indexes of remaining histograms.723int* const clusters =724(int*)WebPSafeMalloc(image_histo_size, sizeof(*clusters));725// Priority queue of histogram pairs.726HistoQueue histo_queue;727728if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) {729goto End;730}731732for (i = 0; i < image_histo_size; ++i) {733// Initialize clusters indexes.734clusters[i] = i;735for (j = i + 1; j < image_histo_size; ++j) {736// Initialize positions array.737HistoQueuePush(&histo_queue, histograms, i, j, 0.);738}739}740741while (image_histo_size > 1 && histo_queue.size > 0) {742const int idx1 = histo_queue.queue[0].idx1;743const int idx2 = histo_queue.queue[0].idx2;744HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);745histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo;746// Remove merged histogram.747for (i = 0; i + 1 < image_histo_size; ++i) {748if (clusters[i] >= idx2) {749clusters[i] = clusters[i + 1];750}751}752--image_histo_size;753754// Remove pairs intersecting the just combined best pair.755for (i = 0; i < histo_queue.size;) {756HistogramPair* const p = histo_queue.queue + i;757if (p->idx1 == idx1 || p->idx2 == idx1 ||758p->idx1 == idx2 || p->idx2 == idx2) {759HistoQueuePopPair(&histo_queue, p);760} else {761HistoQueueUpdateHead(&histo_queue, p);762++i;763}764}765766// Push new pairs formed with combined histogram to the queue.767for (i = 0; i < image_histo_size; ++i) {768if (clusters[i] != idx1) {769HistoQueuePush(&histo_queue, histograms, idx1, clusters[i], 0.);770}771}772}773// Move remaining histograms to the beginning of the array.774for (i = 0; i < image_histo_size; ++i) {775if (i != clusters[i]) { // swap the two histograms776HistogramSwap(&histograms[i], &histograms[clusters[i]]);777}778}779780image_histo->size = image_histo_size;781ok = 1;782783End:784WebPSafeFree(clusters);785HistoQueueClear(&histo_queue);786return ok;787}788789// Perform histogram aggregation using a stochastic approach.790// 'do_greedy' is set to 1 if a greedy approach needs to be performed791// afterwards, 0 otherwise.792static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,793int min_cluster_size,794int* const do_greedy) {795int iter;796uint32_t seed = 1;797int tries_with_no_success = 0;798int image_histo_size = image_histo->size;799const int outer_iters = image_histo_size;800const int num_tries_no_success = outer_iters / 2;801VP8LHistogram** const histograms = image_histo->histograms;802// Priority queue of histogram pairs. Its size of "kCostHeapSizeSqrt"^2803// impacts the quality of the compression and the speed: the smaller the804// faster but the worse for the compression.805HistoQueue histo_queue;806const int kHistoQueueSizeSqrt = 3;807int ok = 0;808809if (!HistoQueueInit(&histo_queue, kHistoQueueSizeSqrt)) {810goto End;811}812// Collapse similar histograms in 'image_histo'.813++min_cluster_size;814for (iter = 0; iter < outer_iters && image_histo_size >= min_cluster_size &&815++tries_with_no_success < num_tries_no_success;816++iter) {817double best_cost =818(histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;819int best_idx1 = -1, best_idx2 = 1;820int j;821const uint32_t rand_range = (image_histo_size - 1) * image_histo_size;822// image_histo_size / 2 was chosen empirically. Less means faster but worse823// compression.824const int num_tries = image_histo_size / 2;825826for (j = 0; j < num_tries; ++j) {827double curr_cost;828// Choose two different histograms at random and try to combine them.829const uint32_t tmp = MyRand(&seed) % rand_range;830const uint32_t idx1 = tmp / (image_histo_size - 1);831uint32_t idx2 = tmp % (image_histo_size - 1);832if (idx2 >= idx1) ++idx2;833834// Calculate cost reduction on combination.835curr_cost =836HistoQueuePush(&histo_queue, histograms, idx1, idx2, best_cost);837if (curr_cost < 0) { // found a better pair?838best_cost = curr_cost;839// Empty the queue if we reached full capacity.840if (histo_queue.size == histo_queue.max_size) break;841}842}843if (histo_queue.size == 0) continue;844845// Merge the two best histograms.846best_idx1 = histo_queue.queue[0].idx1;847best_idx2 = histo_queue.queue[0].idx2;848assert(best_idx1 < best_idx2);849HistogramAddEval(histograms[best_idx1], histograms[best_idx2],850histograms[best_idx1], 0);851// Swap the best_idx2 histogram with the last one (which is now unused).852--image_histo_size;853if (best_idx2 != image_histo_size) {854HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);855}856histograms[image_histo_size] = NULL;857// Parse the queue and update each pair that deals with best_idx1,858// best_idx2 or image_histo_size.859for (j = 0; j < histo_queue.size;) {860HistogramPair* const p = histo_queue.queue + j;861const int is_idx1_best = p->idx1 == best_idx1 || p->idx1 == best_idx2;862const int is_idx2_best = p->idx2 == best_idx1 || p->idx2 == best_idx2;863int do_eval = 0;864// The front pair could have been duplicated by a random pick so865// check for it all the time nevertheless.866if (is_idx1_best && is_idx2_best) {867HistoQueuePopPair(&histo_queue, p);868continue;869}870// Any pair containing one of the two best indices should only refer to871// best_idx1. Its cost should also be updated.872if (is_idx1_best) {873p->idx1 = best_idx1;874do_eval = 1;875} else if (is_idx2_best) {876p->idx2 = best_idx1;877do_eval = 1;878}879if (p->idx2 == image_histo_size) {880// No need to re-evaluate here as it does not involve a pair881// containing best_idx1 or best_idx2.882p->idx2 = best_idx2;883}884assert(p->idx2 < image_histo_size);885// Make sure the index order is respected.886if (p->idx1 > p->idx2) {887const int tmp = p->idx2;888p->idx2 = p->idx1;889p->idx1 = tmp;890}891if (do_eval) {892// Re-evaluate the cost of an updated pair.893GetCombinedHistogramEntropy(histograms[p->idx1], histograms[p->idx2], 0,894&p->cost_diff);895if (p->cost_diff >= 0.) {896HistoQueuePopPair(&histo_queue, p);897continue;898}899}900HistoQueueUpdateHead(&histo_queue, p);901++j;902}903904tries_with_no_success = 0;905}906image_histo->size = image_histo_size;907*do_greedy = (image_histo->size <= min_cluster_size);908ok = 1;909910End:911HistoQueueClear(&histo_queue);912return ok;913}914915// -----------------------------------------------------------------------------916// Histogram refinement917918// Find the best 'out' histogram for each of the 'in' histograms.919// Note: we assume that out[]->bit_cost_ is already up-to-date.920static void HistogramRemap(const VP8LHistogramSet* const in,921const VP8LHistogramSet* const out,922uint16_t* const symbols) {923int i;924VP8LHistogram** const in_histo = in->histograms;925VP8LHistogram** const out_histo = out->histograms;926const int in_size = in->size;927const int out_size = out->size;928if (out_size > 1) {929for (i = 0; i < in_size; ++i) {930int best_out = 0;931double best_bits = MAX_COST;932int k;933for (k = 0; k < out_size; ++k) {934const double cur_bits =935HistogramAddThresh(out_histo[k], in_histo[i], best_bits);936if (k == 0 || cur_bits < best_bits) {937best_bits = cur_bits;938best_out = k;939}940}941symbols[i] = best_out;942}943} else {944assert(out_size == 1);945for (i = 0; i < in_size; ++i) {946symbols[i] = 0;947}948}949950// Recompute each out based on raw and symbols.951for (i = 0; i < out_size; ++i) {952HistogramClear(out_histo[i]);953}954955for (i = 0; i < in_size; ++i) {956const int idx = symbols[i];957HistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]);958}959}960961static double GetCombineCostFactor(int histo_size, int quality) {962double combine_cost_factor = 0.16;963if (quality < 90) {964if (histo_size > 256) combine_cost_factor /= 2.;965if (histo_size > 512) combine_cost_factor /= 2.;966if (histo_size > 1024) combine_cost_factor /= 2.;967if (quality <= 50) combine_cost_factor /= 2.;968}969return combine_cost_factor;970}971972int VP8LGetHistoImageSymbols(int xsize, int ysize,973const VP8LBackwardRefs* const refs,974int quality, int low_effort,975int histo_bits, int cache_bits,976VP8LHistogramSet* const image_histo,977VP8LHistogram* const tmp_histo,978uint16_t* const histogram_symbols) {979int ok = 0;980const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;981const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;982const int image_histo_raw_size = histo_xsize * histo_ysize;983VP8LHistogramSet* const orig_histo =984VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);985// Don't attempt linear bin-partition heuristic for986// histograms of small sizes (as bin_map will be very sparse) and987// maximum quality q==100 (to preserve the compression gains at that level).988const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;989const int entropy_combine =990(orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);991992if (orig_histo == NULL) goto Error;993994// Construct the histograms from backward references.995HistogramBuild(xsize, histo_bits, refs, orig_histo);996// Copies the histograms and computes its bit_cost.997HistogramCopyAndAnalyze(orig_histo, image_histo);998999if (entropy_combine) {1000const int bin_map_size = orig_histo->size;1001// Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.1002uint16_t* const bin_map = histogram_symbols;1003const double combine_cost_factor =1004GetCombineCostFactor(image_histo_raw_size, quality);10051006HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);1007// Collapse histograms with similar entropy.1008HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,1009entropy_combine_num_bins, combine_cost_factor,1010low_effort);1011}10121013// Don't combine the histograms using stochastic and greedy heuristics for1014// low-effort compression mode.1015if (!low_effort || !entropy_combine) {1016const float x = quality / 100.f;1017// cubic ramp between 1 and MAX_HISTO_GREEDY:1018const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));1019int do_greedy;1020if (!HistogramCombineStochastic(image_histo, threshold_size, &do_greedy)) {1021goto Error;1022}1023if (do_greedy && !HistogramCombineGreedy(image_histo)) {1024goto Error;1025}1026}10271028// TODO(vrabaud): Optimize HistogramRemap for low-effort compression mode.1029// Find the optimal map from original histograms to the final ones.1030HistogramRemap(orig_histo, image_histo, histogram_symbols);10311032ok = 1;10331034Error:1035VP8LFreeHistogramSet(orig_histo);1036return ok;1037}103810391040