Path: blob/master/thirdparty/libwebp/src/enc/predictor_enc.c
9913 views
// Copyright 2016 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// Image transform methods for lossless encoder.10//11// Authors: Vikas Arora ([email protected])12// Jyrki Alakuijala ([email protected])13// Urvang Joshi ([email protected])14// Vincent Rabaud ([email protected])1516#include <assert.h>17#include <stdlib.h>18#include <string.h>1920#include "src/dsp/lossless.h"21#include "src/dsp/lossless_common.h"22#include "src/enc/vp8i_enc.h"23#include "src/enc/vp8li_enc.h"24#include "src/utils/utils.h"25#include "src/webp/encode.h"26#include "src/webp/format_constants.h"27#include "src/webp/types.h"2829#define HISTO_SIZE (4 * 256)30static const int64_t kSpatialPredictorBias = 15ll << LOG_2_PRECISION_BITS;31static const int kPredLowEffort = 11;32static const uint32_t kMaskAlpha = 0xff000000;33static const int kNumPredModes = 14;3435// Mostly used to reduce code size + readability36static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }37static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }3839//------------------------------------------------------------------------------40// Methods to calculate Entropy (Shannon).4142// Compute a bias for prediction entropy using a global heuristic to favor43// values closer to 0. Hence the final negative sign.44// 'exp_val' has a scaling factor of 1/100.45static int64_t PredictionCostBias(const uint32_t counts[256], uint64_t weight_0,46uint64_t exp_val) {47const int significant_symbols = 256 >> 4;48const uint64_t exp_decay_factor = 6; // has a scaling factor of 1/1049uint64_t bits = (weight_0 * counts[0]) << LOG_2_PRECISION_BITS;50int i;51exp_val <<= LOG_2_PRECISION_BITS;52for (i = 1; i < significant_symbols; ++i) {53bits += DivRound(exp_val * (counts[i] + counts[256 - i]), 100);54exp_val = DivRound(exp_decay_factor * exp_val, 10);55}56return -DivRound((int64_t)bits, 10);57}5859static int64_t PredictionCostSpatialHistogram(60const uint32_t accumulated[HISTO_SIZE], const uint32_t tile[HISTO_SIZE],61int mode, int left_mode, int above_mode) {62int i;63int64_t retval = 0;64for (i = 0; i < 4; ++i) {65const uint64_t kExpValue = 94;66retval += PredictionCostBias(&tile[i * 256], 1, kExpValue);67// Compute the new cost if 'tile' is added to 'accumulate' but also add the68// cost of the current histogram to guide the spatial predictor selection.69// Basically, favor low entropy, locally and globally.70retval += (int64_t)VP8LCombinedShannonEntropy(&tile[i * 256],71&accumulated[i * 256]);72}73// Favor keeping the areas locally similar.74if (mode == left_mode) retval -= kSpatialPredictorBias;75if (mode == above_mode) retval -= kSpatialPredictorBias;76return retval;77}7879static WEBP_INLINE void UpdateHisto(uint32_t histo_argb[HISTO_SIZE],80uint32_t argb) {81++histo_argb[0 * 256 + (argb >> 24)];82++histo_argb[1 * 256 + ((argb >> 16) & 0xff)];83++histo_argb[2 * 256 + ((argb >> 8) & 0xff)];84++histo_argb[3 * 256 + (argb & 0xff)];85}8687//------------------------------------------------------------------------------88// Spatial transform functions.8990static WEBP_INLINE void PredictBatch(int mode, int x_start, int y,91int num_pixels, const uint32_t* current,92const uint32_t* upper, uint32_t* out) {93if (x_start == 0) {94if (y == 0) {95// ARGB_BLACK.96VP8LPredictorsSub[0](current, NULL, 1, out);97} else {98// Top one.99VP8LPredictorsSub[2](current, upper, 1, out);100}101++x_start;102++out;103--num_pixels;104}105if (y == 0) {106// Left one.107VP8LPredictorsSub[1](current + x_start, NULL, num_pixels, out);108} else {109VP8LPredictorsSub[mode](current + x_start, upper + x_start, num_pixels,110out);111}112}113114#if (WEBP_NEAR_LOSSLESS == 1)115static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) {116const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24));117const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff));118const int diff_g = abs((int)((p1 >> 8) & 0xff) - (int)((p2 >> 8) & 0xff));119const int diff_b = abs((int)(p1 & 0xff) - (int)(p2 & 0xff));120return GetMax(GetMax(diff_a, diff_r), GetMax(diff_g, diff_b));121}122123static int MaxDiffAroundPixel(uint32_t current, uint32_t up, uint32_t down,124uint32_t left, uint32_t right) {125const int diff_up = MaxDiffBetweenPixels(current, up);126const int diff_down = MaxDiffBetweenPixels(current, down);127const int diff_left = MaxDiffBetweenPixels(current, left);128const int diff_right = MaxDiffBetweenPixels(current, right);129return GetMax(GetMax(diff_up, diff_down), GetMax(diff_left, diff_right));130}131132static uint32_t AddGreenToBlueAndRed(uint32_t argb) {133const uint32_t green = (argb >> 8) & 0xff;134uint32_t red_blue = argb & 0x00ff00ffu;135red_blue += (green << 16) | green;136red_blue &= 0x00ff00ffu;137return (argb & 0xff00ff00u) | red_blue;138}139140static void MaxDiffsForRow(int width, int stride, const uint32_t* const argb,141uint8_t* const max_diffs, int used_subtract_green) {142uint32_t current, up, down, left, right;143int x;144if (width <= 2) return;145current = argb[0];146right = argb[1];147if (used_subtract_green) {148current = AddGreenToBlueAndRed(current);149right = AddGreenToBlueAndRed(right);150}151// max_diffs[0] and max_diffs[width - 1] are never used.152for (x = 1; x < width - 1; ++x) {153up = argb[-stride + x];154down = argb[stride + x];155left = current;156current = right;157right = argb[x + 1];158if (used_subtract_green) {159up = AddGreenToBlueAndRed(up);160down = AddGreenToBlueAndRed(down);161right = AddGreenToBlueAndRed(right);162}163max_diffs[x] = MaxDiffAroundPixel(current, up, down, left, right);164}165}166167// Quantize the difference between the actual component value and its prediction168// to a multiple of quantization, working modulo 256, taking care not to cross169// a boundary (inclusive upper limit).170static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict,171uint8_t boundary, int quantization) {172const int residual = (value - predict) & 0xff;173const int boundary_residual = (boundary - predict) & 0xff;174const int lower = residual & ~(quantization - 1);175const int upper = lower + quantization;176// Resolve ties towards a value closer to the prediction (i.e. towards lower177// if value comes after prediction and towards upper otherwise).178const int bias = ((boundary - value) & 0xff) < boundary_residual;179if (residual - lower < upper - residual + bias) {180// lower is closer to residual than upper.181if (residual > boundary_residual && lower <= boundary_residual) {182// Halve quantization step to avoid crossing boundary. This midpoint is183// on the same side of boundary as residual because midpoint >= residual184// (since lower is closer than upper) and residual is above the boundary.185return lower + (quantization >> 1);186}187return lower;188} else {189// upper is closer to residual than lower.190if (residual <= boundary_residual && upper > boundary_residual) {191// Halve quantization step to avoid crossing boundary. This midpoint is192// on the same side of boundary as residual because midpoint <= residual193// (since upper is closer than lower) and residual is below the boundary.194return lower + (quantization >> 1);195}196return upper & 0xff;197}198}199200static WEBP_INLINE uint8_t NearLosslessDiff(uint8_t a, uint8_t b) {201return (uint8_t)((((int)(a) - (int)(b))) & 0xff);202}203204// Quantize every component of the difference between the actual pixel value and205// its prediction to a multiple of a quantization (a power of 2, not larger than206// max_quantization which is a power of 2, smaller than max_diff). Take care if207// value and predict have undergone subtract green, which means that red and208// blue are represented as offsets from green.209static uint32_t NearLossless(uint32_t value, uint32_t predict,210int max_quantization, int max_diff,211int used_subtract_green) {212int quantization;213uint8_t new_green = 0;214uint8_t green_diff = 0;215uint8_t a, r, g, b;216if (max_diff <= 2) {217return VP8LSubPixels(value, predict);218}219quantization = max_quantization;220while (quantization >= max_diff) {221quantization >>= 1;222}223if ((value >> 24) == 0 || (value >> 24) == 0xff) {224// Preserve transparency of fully transparent or fully opaque pixels.225a = NearLosslessDiff((value >> 24) & 0xff, (predict >> 24) & 0xff);226} else {227a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization);228}229g = NearLosslessComponent((value >> 8) & 0xff, (predict >> 8) & 0xff, 0xff,230quantization);231if (used_subtract_green) {232// The green offset will be added to red and blue components during decoding233// to obtain the actual red and blue values.234new_green = ((predict >> 8) + g) & 0xff;235// The amount by which green has been adjusted during quantization. It is236// subtracted from red and blue for compensation, to avoid accumulating two237// quantization errors in them.238green_diff = NearLosslessDiff(new_green, (value >> 8) & 0xff);239}240r = NearLosslessComponent(NearLosslessDiff((value >> 16) & 0xff, green_diff),241(predict >> 16) & 0xff, 0xff - new_green,242quantization);243b = NearLosslessComponent(NearLosslessDiff(value & 0xff, green_diff),244predict & 0xff, 0xff - new_green, quantization);245return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;246}247#endif // (WEBP_NEAR_LOSSLESS == 1)248249// Stores the difference between the pixel and its prediction in "out".250// In case of a lossy encoding, updates the source image to avoid propagating251// the deviation further to pixels which depend on the current pixel for their252// predictions.253static WEBP_INLINE void GetResidual(254int width, int height, uint32_t* const upper_row,255uint32_t* const current_row, const uint8_t* const max_diffs, int mode,256int x_start, int x_end, int y, int max_quantization, int exact,257int used_subtract_green, uint32_t* const out) {258if (exact) {259PredictBatch(mode, x_start, y, x_end - x_start, current_row, upper_row,260out);261} else {262const VP8LPredictorFunc pred_func = VP8LPredictors[mode];263int x;264for (x = x_start; x < x_end; ++x) {265uint32_t predict;266uint32_t residual;267if (y == 0) {268predict = (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.269} else if (x == 0) {270predict = upper_row[x]; // Top.271} else {272predict = pred_func(¤t_row[x - 1], upper_row + x);273}274#if (WEBP_NEAR_LOSSLESS == 1)275if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||276x == 0 || x == width - 1) {277residual = VP8LSubPixels(current_row[x], predict);278} else {279residual = NearLossless(current_row[x], predict, max_quantization,280max_diffs[x], used_subtract_green);281// Update the source image.282current_row[x] = VP8LAddPixels(predict, residual);283// x is never 0 here so we do not need to update upper_row like below.284}285#else286(void)max_diffs;287(void)height;288(void)max_quantization;289(void)used_subtract_green;290residual = VP8LSubPixels(current_row[x], predict);291#endif292if ((current_row[x] & kMaskAlpha) == 0) {293// If alpha is 0, cleanup RGB. We can choose the RGB values of the294// residual for best compression. The prediction of alpha itself can be295// non-zero and must be kept though. We choose RGB of the residual to be296// 0.297residual &= kMaskAlpha;298// Update the source image.299current_row[x] = predict & ~kMaskAlpha;300// The prediction for the rightmost pixel in a row uses the leftmost301// pixel302// in that row as its top-right context pixel. Hence if we change the303// leftmost pixel of current_row, the corresponding change must be304// applied305// to upper_row as well where top-right context is being read from.306if (x == 0 && y != 0) upper_row[width] = current_row[0];307}308out[x - x_start] = residual;309}310}311}312313// Accessors to residual histograms.314static WEBP_INLINE uint32_t* GetHistoArgb(uint32_t* const all_histos,315int subsampling_index, int mode) {316return &all_histos[(subsampling_index * kNumPredModes + mode) * HISTO_SIZE];317}318319static WEBP_INLINE const uint32_t* GetHistoArgbConst(320const uint32_t* const all_histos, int subsampling_index, int mode) {321return &all_histos[subsampling_index * kNumPredModes * HISTO_SIZE +322mode * HISTO_SIZE];323}324325// Accessors to accumulated residual histogram.326static WEBP_INLINE uint32_t* GetAccumulatedHisto(uint32_t* all_accumulated,327int subsampling_index) {328return &all_accumulated[subsampling_index * HISTO_SIZE];329}330331// Find and store the best predictor for a tile at subsampling332// 'subsampling_index'.333static void GetBestPredictorForTile(const uint32_t* const all_argb,334int subsampling_index, int tile_x,335int tile_y, int tiles_per_row,336uint32_t* all_accumulated_argb,337uint32_t** const all_modes,338uint32_t* const all_pred_histos) {339uint32_t* const accumulated_argb =340GetAccumulatedHisto(all_accumulated_argb, subsampling_index);341uint32_t* const modes = all_modes[subsampling_index];342uint32_t* const pred_histos =343&all_pred_histos[subsampling_index * kNumPredModes];344// Prediction modes of the left and above neighbor tiles.345const int left_mode =346(tile_x > 0) ? (modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff347: 0xff;348const int above_mode =349(tile_y > 0) ? (modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff350: 0xff;351int mode;352int64_t best_diff = WEBP_INT64_MAX;353uint32_t best_mode = 0;354const uint32_t* best_histo =355GetHistoArgbConst(all_argb, /*subsampling_index=*/0, best_mode);356for (mode = 0; mode < kNumPredModes; ++mode) {357const uint32_t* const histo_argb =358GetHistoArgbConst(all_argb, subsampling_index, mode);359const int64_t cur_diff = PredictionCostSpatialHistogram(360accumulated_argb, histo_argb, mode, left_mode, above_mode);361362if (cur_diff < best_diff) {363best_histo = histo_argb;364best_diff = cur_diff;365best_mode = mode;366}367}368// Update the accumulated histogram.369VP8LAddVectorEq(best_histo, accumulated_argb, HISTO_SIZE);370modes[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (best_mode << 8);371++pred_histos[best_mode];372}373374// Computes the residuals for the different predictors.375// If max_quantization > 1, assumes that near lossless processing will be376// applied, quantizing residuals to multiples of quantization levels up to377// max_quantization (the actual quantization level depends on smoothness near378// the given pixel).379static void ComputeResidualsForTile(380int width, int height, int tile_x, int tile_y, int min_bits,381uint32_t update_up_to_index, uint32_t* const all_argb,382uint32_t* const argb_scratch, const uint32_t* const argb,383int max_quantization, int exact, int used_subtract_green) {384const int start_x = tile_x << min_bits;385const int start_y = tile_y << min_bits;386const int tile_size = 1 << min_bits;387const int max_y = GetMin(tile_size, height - start_y);388const int max_x = GetMin(tile_size, width - start_x);389// Whether there exist columns just outside the tile.390const int have_left = (start_x > 0);391// Position and size of the strip covering the tile and adjacent columns if392// they exist.393const int context_start_x = start_x - have_left;394#if (WEBP_NEAR_LOSSLESS == 1)395const int context_width = max_x + have_left + (max_x < width - start_x);396#endif397// The width of upper_row and current_row is one pixel larger than image width398// to allow the top right pixel to point to the leftmost pixel of the next row399// when at the right edge.400uint32_t* upper_row = argb_scratch;401uint32_t* current_row = upper_row + width + 1;402uint8_t* const max_diffs = (uint8_t*)(current_row + width + 1);403int mode;404// Need pointers to be able to swap arrays.405uint32_t residuals[1 << MAX_TRANSFORM_BITS];406assert(max_x <= (1 << MAX_TRANSFORM_BITS));407for (mode = 0; mode < kNumPredModes; ++mode) {408int relative_y;409uint32_t* const histo_argb =410GetHistoArgb(all_argb, /*subsampling_index=*/0, mode);411if (start_y > 0) {412// Read the row above the tile which will become the first upper_row.413// Include a pixel to the left if it exists; include a pixel to the right414// in all cases (wrapping to the leftmost pixel of the next row if it does415// not exist).416memcpy(current_row + context_start_x,417argb + (start_y - 1) * width + context_start_x,418sizeof(*argb) * (max_x + have_left + 1));419}420for (relative_y = 0; relative_y < max_y; ++relative_y) {421const int y = start_y + relative_y;422int relative_x;423uint32_t* tmp = upper_row;424upper_row = current_row;425current_row = tmp;426// Read current_row. Include a pixel to the left if it exists; include a427// pixel to the right in all cases except at the bottom right corner of428// the image (wrapping to the leftmost pixel of the next row if it does429// not exist in the current row).430memcpy(current_row + context_start_x,431argb + y * width + context_start_x,432sizeof(*argb) * (max_x + have_left + (y + 1 < height)));433#if (WEBP_NEAR_LOSSLESS == 1)434if (max_quantization > 1 && y >= 1 && y + 1 < height) {435MaxDiffsForRow(context_width, width, argb + y * width + context_start_x,436max_diffs + context_start_x, used_subtract_green);437}438#endif439440GetResidual(width, height, upper_row, current_row, max_diffs, mode,441start_x, start_x + max_x, y, max_quantization, exact,442used_subtract_green, residuals);443for (relative_x = 0; relative_x < max_x; ++relative_x) {444UpdateHisto(histo_argb, residuals[relative_x]);445}446if (update_up_to_index > 0) {447uint32_t subsampling_index;448for (subsampling_index = 1; subsampling_index <= update_up_to_index;449++subsampling_index) {450uint32_t* const super_histo =451GetHistoArgb(all_argb, subsampling_index, mode);452for (relative_x = 0; relative_x < max_x; ++relative_x) {453UpdateHisto(super_histo, residuals[relative_x]);454}455}456}457}458}459}460461// Converts pixels of the image to residuals with respect to predictions.462// If max_quantization > 1, applies near lossless processing, quantizing463// residuals to multiples of quantization levels up to max_quantization464// (the actual quantization level depends on smoothness near the given pixel).465static void CopyImageWithPrediction(int width, int height, int bits,466const uint32_t* const modes,467uint32_t* const argb_scratch,468uint32_t* const argb, int low_effort,469int max_quantization, int exact,470int used_subtract_green) {471const int tiles_per_row = VP8LSubSampleSize(width, bits);472// The width of upper_row and current_row is one pixel larger than image width473// to allow the top right pixel to point to the leftmost pixel of the next row474// when at the right edge.475uint32_t* upper_row = argb_scratch;476uint32_t* current_row = upper_row + width + 1;477uint8_t* current_max_diffs = (uint8_t*)(current_row + width + 1);478#if (WEBP_NEAR_LOSSLESS == 1)479uint8_t* lower_max_diffs = current_max_diffs + width;480#endif481int y;482483for (y = 0; y < height; ++y) {484int x;485uint32_t* const tmp32 = upper_row;486upper_row = current_row;487current_row = tmp32;488memcpy(current_row, argb + y * width,489sizeof(*argb) * (width + (y + 1 < height)));490491if (low_effort) {492PredictBatch(kPredLowEffort, 0, y, width, current_row, upper_row,493argb + y * width);494} else {495#if (WEBP_NEAR_LOSSLESS == 1)496if (max_quantization > 1) {497// Compute max_diffs for the lower row now, because that needs the498// contents of argb for the current row, which we will overwrite with499// residuals before proceeding with the next row.500uint8_t* const tmp8 = current_max_diffs;501current_max_diffs = lower_max_diffs;502lower_max_diffs = tmp8;503if (y + 2 < height) {504MaxDiffsForRow(width, width, argb + (y + 1) * width, lower_max_diffs,505used_subtract_green);506}507}508#endif509for (x = 0; x < width;) {510const int mode =511(modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;512int x_end = x + (1 << bits);513if (x_end > width) x_end = width;514GetResidual(width, height, upper_row, current_row, current_max_diffs,515mode, x, x_end, y, max_quantization, exact,516used_subtract_green, argb + y * width + x);517x = x_end;518}519}520}521}522523// Checks whether 'image' can be subsampled by finding the biggest power of 2524// squares (defined by 'best_bits') of uniform value it is made out of.525void VP8LOptimizeSampling(uint32_t* const image, int full_width,526int full_height, int bits, int max_bits,527int* best_bits_out) {528int width = VP8LSubSampleSize(full_width, bits);529int height = VP8LSubSampleSize(full_height, bits);530int old_width, x, y, square_size;531int best_bits = bits;532*best_bits_out = bits;533// Check rows first.534while (best_bits < max_bits) {535const int new_square_size = 1 << (best_bits + 1 - bits);536int is_good = 1;537square_size = 1 << (best_bits - bits);538for (y = 0; y + square_size < height; y += new_square_size) {539// Check the first lines of consecutive line groups.540if (memcmp(&image[y * width], &image[(y + square_size) * width],541width * sizeof(*image)) != 0) {542is_good = 0;543break;544}545}546if (is_good) {547++best_bits;548} else {549break;550}551}552if (best_bits == bits) return;553554// Check columns.555while (best_bits > bits) {556int is_good = 1;557square_size = 1 << (best_bits - bits);558for (y = 0; is_good && y < height; ++y) {559for (x = 0; is_good && x < width; x += square_size) {560int i;561for (i = x + 1; i < GetMin(x + square_size, width); ++i) {562if (image[y * width + i] != image[y * width + x]) {563is_good = 0;564break;565}566}567}568}569if (is_good) {570break;571}572--best_bits;573}574if (best_bits == bits) return;575576// Subsample the image.577old_width = width;578square_size = 1 << (best_bits - bits);579width = VP8LSubSampleSize(full_width, best_bits);580height = VP8LSubSampleSize(full_height, best_bits);581for (y = 0; y < height; ++y) {582for (x = 0; x < width; ++x) {583image[y * width + x] = image[square_size * (y * old_width + x)];584}585}586*best_bits_out = best_bits;587}588589// Computes the best predictor image.590// Finds the best predictors per tile. Once done, finds the best predictor image591// sampling.592// best_bits is set to 0 in case of error.593// The following requires some glossary:594// - a tile is a square of side 2^min_bits pixels.595// - a super-tile of a tile is a square of side 2^bits pixels with bits in596// [min_bits+1, max_bits].597// - the max-tile of a tile is the square of 2^max_bits pixels containing it.598// If this max-tile crosses the border of an image, it is cropped.599// - tile, super-tiles and max_tile are aligned on powers of 2 in the original600// image.601// - coordinates for tile, super-tile, max-tile are respectively named602// tile_x, super_tile_x, max_tile_x at their bit scale.603// - in the max-tile, a tile has local coordinates (local_tile_x, local_tile_y).604// The tiles are processed in the following zigzag order to complete the605// super-tiles as soon as possible:606// 1 2| 5 6607// 3 4| 7 8608// --------------609// 9 10| 13 14610// 11 12| 15 16611// When computing the residuals for a tile, the histogram of the above612// super-tile is updated. If this super-tile is finished, its histogram is used613// to update the histogram of the next super-tile and so on up to the max-tile.614static void GetBestPredictorsAndSubSampling(615int width, int height, const int min_bits, const int max_bits,616uint32_t* const argb_scratch, const uint32_t* const argb,617int max_quantization, int exact, int used_subtract_green,618const WebPPicture* const pic, int percent_range, int* const percent,619uint32_t** const all_modes, int* best_bits, uint32_t** best_mode) {620const uint32_t tiles_per_row = VP8LSubSampleSize(width, min_bits);621const uint32_t tiles_per_col = VP8LSubSampleSize(height, min_bits);622int64_t best_cost;623uint32_t subsampling_index;624const uint32_t max_subsampling_index = max_bits - min_bits;625// Compute the needed memory size for residual histograms, accumulated626// residual histograms and predictor histograms.627const int num_argb = (max_subsampling_index + 1) * kNumPredModes * HISTO_SIZE;628const int num_accumulated_rgb = (max_subsampling_index + 1) * HISTO_SIZE;629const int num_predictors = (max_subsampling_index + 1) * kNumPredModes;630uint32_t* const raw_data = (uint32_t*)WebPSafeCalloc(631num_argb + num_accumulated_rgb + num_predictors, sizeof(uint32_t));632uint32_t* const all_argb = raw_data;633uint32_t* const all_accumulated_argb = all_argb + num_argb;634uint32_t* const all_pred_histos = all_accumulated_argb + num_accumulated_rgb;635const int max_tile_size = 1 << max_subsampling_index; // in tile size636int percent_start = *percent;637// When using the residuals of a tile for its super-tiles, you can either:638// - use each residual to update the histogram of the super-tile, with a cost639// of 4 * (1<<n)^2 increment operations (4 for the number of channels, and640// (1<<n)^2 for the number of pixels in the tile)641// - use the histogram of the tile to update the histogram of the super-tile,642// with a cost of HISTO_SIZE (1024)643// The first method is therefore faster until n==4. 'update_up_to_index'644// defines the maximum subsampling_index for which the residuals should be645// individually added to the super-tile histogram.646const uint32_t update_up_to_index =647GetMax(GetMin(4, max_bits), min_bits) - min_bits;648// Coordinates in the max-tile in tile units.649uint32_t local_tile_x = 0, local_tile_y = 0;650uint32_t max_tile_x = 0, max_tile_y = 0;651uint32_t tile_x = 0, tile_y = 0;652653*best_bits = 0;654*best_mode = NULL;655if (raw_data == NULL) return;656657while (tile_y < tiles_per_col) {658ComputeResidualsForTile(width, height, tile_x, tile_y, min_bits,659update_up_to_index, all_argb, argb_scratch, argb,660max_quantization, exact, used_subtract_green);661662// Update all the super-tiles that are complete.663subsampling_index = 0;664while (1) {665const uint32_t super_tile_x = tile_x >> subsampling_index;666const uint32_t super_tile_y = tile_y >> subsampling_index;667const uint32_t super_tiles_per_row =668VP8LSubSampleSize(width, min_bits + subsampling_index);669GetBestPredictorForTile(all_argb, subsampling_index, super_tile_x,670super_tile_y, super_tiles_per_row,671all_accumulated_argb, all_modes, all_pred_histos);672if (subsampling_index == max_subsampling_index) break;673674// Update the following super-tile histogram if it has not been updated675// yet.676++subsampling_index;677if (subsampling_index > update_up_to_index &&678subsampling_index <= max_subsampling_index) {679VP8LAddVectorEq(680GetHistoArgbConst(all_argb, subsampling_index - 1, /*mode=*/0),681GetHistoArgb(all_argb, subsampling_index, /*mode=*/0),682HISTO_SIZE * kNumPredModes);683}684// Check whether the super-tile is not complete (if the smallest tile685// is not at the end of a line/column or at the beginning of a super-tile686// of size (1 << subsampling_index)).687if (!((tile_x == (tiles_per_row - 1) ||688(local_tile_x + 1) % (1 << subsampling_index) == 0) &&689(tile_y == (tiles_per_col - 1) ||690(local_tile_y + 1) % (1 << subsampling_index) == 0))) {691--subsampling_index;692// subsampling_index now is the index of the last finished super-tile.693break;694}695}696// Reset all the histograms belonging to finished tiles.697memset(all_argb, 0,698HISTO_SIZE * kNumPredModes * (subsampling_index + 1) *699sizeof(*all_argb));700701if (subsampling_index == max_subsampling_index) {702// If a new max-tile is started.703if (tile_x == (tiles_per_row - 1)) {704max_tile_x = 0;705++max_tile_y;706} else {707++max_tile_x;708}709local_tile_x = 0;710local_tile_y = 0;711} else {712// Proceed with the Z traversal.713uint32_t coord_x = local_tile_x >> subsampling_index;714uint32_t coord_y = local_tile_y >> subsampling_index;715if (tile_x == (tiles_per_row - 1) && coord_x % 2 == 0) {716++coord_y;717} else {718if (coord_x % 2 == 0) {719++coord_x;720} else {721// Z traversal.722++coord_y;723--coord_x;724}725}726local_tile_x = coord_x << subsampling_index;727local_tile_y = coord_y << subsampling_index;728}729tile_x = max_tile_x * max_tile_size + local_tile_x;730tile_y = max_tile_y * max_tile_size + local_tile_y;731732if (tile_x == 0 &&733!WebPReportProgress(734pic, percent_start + percent_range * tile_y / tiles_per_col,735percent)) {736WebPSafeFree(raw_data);737return;738}739}740741// Figure out the best sampling.742best_cost = WEBP_INT64_MAX;743for (subsampling_index = 0; subsampling_index <= max_subsampling_index;744++subsampling_index) {745int plane;746const uint32_t* const accumulated =747GetAccumulatedHisto(all_accumulated_argb, subsampling_index);748int64_t cost = VP8LShannonEntropy(749&all_pred_histos[subsampling_index * kNumPredModes], kNumPredModes);750for (plane = 0; plane < 4; ++plane) {751cost += VP8LShannonEntropy(&accumulated[plane * 256], 256);752}753if (cost < best_cost) {754best_cost = cost;755*best_bits = min_bits + subsampling_index;756*best_mode = all_modes[subsampling_index];757}758}759760WebPSafeFree(raw_data);761762VP8LOptimizeSampling(*best_mode, width, height, *best_bits,763MAX_TRANSFORM_BITS, best_bits);764}765766// Finds the best predictor for each tile, and converts the image to residuals767// with respect to predictions. If near_lossless_quality < 100, applies768// near lossless processing, shaving off more bits of residuals for lower769// qualities.770int VP8LResidualImage(int width, int height, int min_bits, int max_bits,771int low_effort, uint32_t* const argb,772uint32_t* const argb_scratch, uint32_t* const image,773int near_lossless_quality, int exact,774int used_subtract_green, const WebPPicture* const pic,775int percent_range, int* const percent,776int* const best_bits) {777int percent_start = *percent;778const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);779if (low_effort) {780const int tiles_per_row = VP8LSubSampleSize(width, max_bits);781const int tiles_per_col = VP8LSubSampleSize(height, max_bits);782int i;783for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {784image[i] = ARGB_BLACK | (kPredLowEffort << 8);785}786*best_bits = max_bits;787} else {788// Allocate data to try all samplings from min_bits to max_bits.789int bits;790uint32_t sum_num_pixels = 0u;791uint32_t *modes_raw, *best_mode;792uint32_t* modes[MAX_TRANSFORM_BITS + 1];793uint32_t num_pixels[MAX_TRANSFORM_BITS + 1];794for (bits = min_bits; bits <= max_bits; ++bits) {795const int tiles_per_row = VP8LSubSampleSize(width, bits);796const int tiles_per_col = VP8LSubSampleSize(height, bits);797num_pixels[bits] = tiles_per_row * tiles_per_col;798sum_num_pixels += num_pixels[bits];799}800modes_raw = (uint32_t*)WebPSafeMalloc(sum_num_pixels, sizeof(*modes_raw));801if (modes_raw == NULL) return 0;802// Have modes point to the right global memory modes_raw.803modes[min_bits] = modes_raw;804for (bits = min_bits + 1; bits <= max_bits; ++bits) {805modes[bits] = modes[bits - 1] + num_pixels[bits - 1];806}807// Find the best sampling.808GetBestPredictorsAndSubSampling(809width, height, min_bits, max_bits, argb_scratch, argb, max_quantization,810exact, used_subtract_green, pic, percent_range, percent,811&modes[min_bits], best_bits, &best_mode);812if (*best_bits == 0) {813WebPSafeFree(modes_raw);814return 0;815}816// Keep the best predictor image.817memcpy(image, best_mode,818VP8LSubSampleSize(width, *best_bits) *819VP8LSubSampleSize(height, *best_bits) * sizeof(*image));820WebPSafeFree(modes_raw);821}822823CopyImageWithPrediction(width, height, *best_bits, image, argb_scratch, argb,824low_effort, max_quantization, exact,825used_subtract_green);826return WebPReportProgress(pic, percent_start + percent_range, percent);827}828829//------------------------------------------------------------------------------830// Color transform functions.831832static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {833m->green_to_red_ = 0;834m->green_to_blue_ = 0;835m->red_to_blue_ = 0;836}837838static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,839VP8LMultipliers* const m) {840m->green_to_red_ = (color_code >> 0) & 0xff;841m->green_to_blue_ = (color_code >> 8) & 0xff;842m->red_to_blue_ = (color_code >> 16) & 0xff;843}844845static WEBP_INLINE uint32_t MultipliersToColorCode(846const VP8LMultipliers* const m) {847return 0xff000000u |848((uint32_t)(m->red_to_blue_) << 16) |849((uint32_t)(m->green_to_blue_) << 8) |850m->green_to_red_;851}852853static int64_t PredictionCostCrossColor(const uint32_t accumulated[256],854const uint32_t counts[256]) {855// Favor low entropy, locally and globally.856// Favor small absolute values for PredictionCostSpatial857static const uint64_t kExpValue = 240;858return (int64_t)VP8LCombinedShannonEntropy(counts, accumulated) +859PredictionCostBias(counts, 3, kExpValue);860}861862static int64_t GetPredictionCostCrossColorRed(863const uint32_t* argb, int stride, int tile_width, int tile_height,864VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,865const uint32_t accumulated_red_histo[256]) {866uint32_t histo[256] = { 0 };867int64_t cur_diff;868869VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,870green_to_red, histo);871872cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);873if ((uint8_t)green_to_red == prev_x.green_to_red_) {874// favor keeping the areas locally similar875cur_diff -= 3ll << LOG_2_PRECISION_BITS;876}877if ((uint8_t)green_to_red == prev_y.green_to_red_) {878// favor keeping the areas locally similar879cur_diff -= 3ll << LOG_2_PRECISION_BITS;880}881if (green_to_red == 0) {882cur_diff -= 3ll << LOG_2_PRECISION_BITS;883}884return cur_diff;885}886887static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,888int tile_height, VP8LMultipliers prev_x,889VP8LMultipliers prev_y, int quality,890const uint32_t accumulated_red_histo[256],891VP8LMultipliers* const best_tx) {892const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]893int green_to_red_best = 0;894int iter, offset;895int64_t best_diff = GetPredictionCostCrossColorRed(896argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_red_best,897accumulated_red_histo);898for (iter = 0; iter < kMaxIters; ++iter) {899// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to900// one in color computation. Having initial delta here as 1 is sufficient901// to explore the range of (-2, 2).902const int delta = 32 >> iter;903// Try a negative and a positive delta from the best known value.904for (offset = -delta; offset <= delta; offset += 2 * delta) {905const int green_to_red_cur = offset + green_to_red_best;906const int64_t cur_diff = GetPredictionCostCrossColorRed(907argb, stride, tile_width, tile_height, prev_x, prev_y,908green_to_red_cur, accumulated_red_histo);909if (cur_diff < best_diff) {910best_diff = cur_diff;911green_to_red_best = green_to_red_cur;912}913}914}915best_tx->green_to_red_ = (green_to_red_best & 0xff);916}917918static int64_t GetPredictionCostCrossColorBlue(919const uint32_t* argb, int stride, int tile_width, int tile_height,920VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_blue,921int red_to_blue, const uint32_t accumulated_blue_histo[256]) {922uint32_t histo[256] = { 0 };923int64_t cur_diff;924925VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,926green_to_blue, red_to_blue, histo);927928cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);929if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {930// favor keeping the areas locally similar931cur_diff -= 3ll << LOG_2_PRECISION_BITS;932}933if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {934// favor keeping the areas locally similar935cur_diff -= 3ll << LOG_2_PRECISION_BITS;936}937if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {938// favor keeping the areas locally similar939cur_diff -= 3ll << LOG_2_PRECISION_BITS;940}941if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {942// favor keeping the areas locally similar943cur_diff -= 3ll << LOG_2_PRECISION_BITS;944}945if (green_to_blue == 0) {946cur_diff -= 3ll << LOG_2_PRECISION_BITS;947}948if (red_to_blue == 0) {949cur_diff -= 3ll << LOG_2_PRECISION_BITS;950}951return cur_diff;952}953954#define kGreenRedToBlueNumAxis 8955#define kGreenRedToBlueMaxIters 7956static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,957int tile_width, int tile_height,958VP8LMultipliers prev_x,959VP8LMultipliers prev_y, int quality,960const uint32_t accumulated_blue_histo[256],961VP8LMultipliers* const best_tx) {962const int8_t offset[kGreenRedToBlueNumAxis][2] =963{{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};964const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };965const int iters =966(quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;967int green_to_blue_best = 0;968int red_to_blue_best = 0;969int iter;970// Initial value at origin:971int64_t best_diff = GetPredictionCostCrossColorBlue(972argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_blue_best,973red_to_blue_best, accumulated_blue_histo);974for (iter = 0; iter < iters; ++iter) {975const int delta = delta_lut[iter];976int axis;977for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {978const int green_to_blue_cur =979offset[axis][0] * delta + green_to_blue_best;980const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;981const int64_t cur_diff = GetPredictionCostCrossColorBlue(982argb, stride, tile_width, tile_height, prev_x, prev_y,983green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);984if (cur_diff < best_diff) {985best_diff = cur_diff;986green_to_blue_best = green_to_blue_cur;987red_to_blue_best = red_to_blue_cur;988}989if (quality < 25 && iter == 4) {990// Only axis aligned diffs for lower quality.991break; // next iter.992}993}994if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {995// Further iterations would not help.996break; // out of iter-loop.997}998}999best_tx->green_to_blue_ = green_to_blue_best & 0xff;1000best_tx->red_to_blue_ = red_to_blue_best & 0xff;1001}1002#undef kGreenRedToBlueMaxIters1003#undef kGreenRedToBlueNumAxis10041005static VP8LMultipliers GetBestColorTransformForTile(1006int tile_x, int tile_y, int bits, VP8LMultipliers prev_x,1007VP8LMultipliers prev_y, int quality, int xsize, int ysize,1008const uint32_t accumulated_red_histo[256],1009const uint32_t accumulated_blue_histo[256], const uint32_t* const argb) {1010const int max_tile_size = 1 << bits;1011const int tile_y_offset = tile_y * max_tile_size;1012const int tile_x_offset = tile_x * max_tile_size;1013const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);1014const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);1015const int tile_width = all_x_max - tile_x_offset;1016const int tile_height = all_y_max - tile_y_offset;1017const uint32_t* const tile_argb = argb + tile_y_offset * xsize1018+ tile_x_offset;1019VP8LMultipliers best_tx;1020MultipliersClear(&best_tx);10211022GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,1023prev_x, prev_y, quality, accumulated_red_histo, &best_tx);1024GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,1025prev_x, prev_y, quality, accumulated_blue_histo,1026&best_tx);1027return best_tx;1028}10291030static void CopyTileWithColorTransform(int xsize, int ysize,1031int tile_x, int tile_y,1032int max_tile_size,1033VP8LMultipliers color_transform,1034uint32_t* argb) {1035const int xscan = GetMin(max_tile_size, xsize - tile_x);1036int yscan = GetMin(max_tile_size, ysize - tile_y);1037argb += tile_y * xsize + tile_x;1038while (yscan-- > 0) {1039VP8LTransformColor(&color_transform, argb, xscan);1040argb += xsize;1041}1042}10431044int VP8LColorSpaceTransform(int width, int height, int bits, int quality,1045uint32_t* const argb, uint32_t* image,1046const WebPPicture* const pic, int percent_range,1047int* const percent, int* const best_bits) {1048const int max_tile_size = 1 << bits;1049const int tile_xsize = VP8LSubSampleSize(width, bits);1050const int tile_ysize = VP8LSubSampleSize(height, bits);1051int percent_start = *percent;1052uint32_t accumulated_red_histo[256] = { 0 };1053uint32_t accumulated_blue_histo[256] = { 0 };1054int tile_x, tile_y;1055VP8LMultipliers prev_x, prev_y;1056MultipliersClear(&prev_y);1057MultipliersClear(&prev_x);1058for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {1059for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {1060int y;1061const int tile_x_offset = tile_x * max_tile_size;1062const int tile_y_offset = tile_y * max_tile_size;1063const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);1064const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);1065const int offset = tile_y * tile_xsize + tile_x;1066if (tile_y != 0) {1067ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);1068}1069prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,1070prev_x, prev_y,1071quality, width, height,1072accumulated_red_histo,1073accumulated_blue_histo,1074argb);1075image[offset] = MultipliersToColorCode(&prev_x);1076CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,1077max_tile_size, prev_x, argb);10781079// Gather accumulated histogram data.1080for (y = tile_y_offset; y < all_y_max; ++y) {1081int ix = y * width + tile_x_offset;1082const int ix_end = ix + all_x_max - tile_x_offset;1083for (; ix < ix_end; ++ix) {1084const uint32_t pix = argb[ix];1085if (ix >= 2 &&1086pix == argb[ix - 2] &&1087pix == argb[ix - 1]) {1088continue; // repeated pixels are handled by backward references1089}1090if (ix >= width + 2 &&1091argb[ix - 2] == argb[ix - width - 2] &&1092argb[ix - 1] == argb[ix - width - 1] &&1093pix == argb[ix - width]) {1094continue; // repeated pixels are handled by backward references1095}1096++accumulated_red_histo[(pix >> 16) & 0xff];1097++accumulated_blue_histo[(pix >> 0) & 0xff];1098}1099}1100}1101if (!WebPReportProgress(1102pic, percent_start + percent_range * tile_y / tile_ysize,1103percent)) {1104return 0;1105}1106}1107VP8LOptimizeSampling(image, width, height, bits, MAX_TRANSFORM_BITS,1108best_bits);1109return 1;1110}111111121113