Path: blob/master/thirdparty/libwebp/src/enc/vp8i_enc.h
21875 views
// Copyright 2011 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// WebP encoder: internal header.10//11// Author: Skal ([email protected])1213#ifndef WEBP_ENC_VP8I_ENC_H_14#define WEBP_ENC_VP8I_ENC_H_1516#include <string.h> // for memcpy()1718#include "src/dec/common_dec.h"19#include "src/dsp/cpu.h"20#include "src/dsp/dsp.h"21#include "src/utils/bit_writer_utils.h"22#include "src/utils/thread_utils.h"23#include "src/utils/utils.h"24#include "src/webp/encode.h"25#include "src/webp/types.h"2627#ifdef __cplusplus28extern "C" {29#endif3031//------------------------------------------------------------------------------32// Various defines and enums3334// version numbers35#define ENC_MAJ_VERSION 136#define ENC_MIN_VERSION 637#define ENC_REV_VERSION 03839enum { MAX_LF_LEVELS = 64, // Maximum loop filter level40MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost41MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)42};4344typedef enum { // Rate-distortion optimization levels45RD_OPT_NONE = 0, // no rd-opt46RD_OPT_BASIC = 1, // basic scoring (no trellis)47RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only48RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)49} VP8RDLevel;5051// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).52// The original or reconstructed samples can be accessed using VP8Scan[].53// The predicted blocks can be accessed using offsets to 'yuv_p' and54// the arrays VP8*ModeOffsets[].55// * YUV Samples area ('yuv_in'/'yuv_out'/'yuv_out2')56// (see VP8Scan[] for accessing the blocks, along with57// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):58// +----+----+59// Y_OFF_ENC |YYYY|UUVV|60// U_OFF_ENC |YYYY|UUVV|61// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area62// |YYYY|....|63// +----+----+64// * Prediction area ('yuv_p', size = PRED_SIZE_ENC)65// Intra16 predictions (16x16 block each, two per row):66// |I16DC16|I16TM16|67// |I16VE16|I16HE16|68// Chroma U/V predictions (16x8 block each, two per row):69// |C8DC8|C8TM8|70// |C8VE8|C8HE8|71// Intra 4x4 predictions (4x4 block each)72// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|73// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted74#define YUV_SIZE_ENC (BPS * 16)75#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds76#define Y_OFF_ENC (0)77#define U_OFF_ENC (16)78#define V_OFF_ENC (16 + 8)7980extern const uint16_t VP8Scan[16];81extern const uint16_t VP8UVModeOffsets[4];82extern const uint16_t VP8I16ModeOffsets[4];8384// Layout of prediction blocks85// intra 16x1686#define I16DC16 (0 * 16 * BPS)87#define I16TM16 (I16DC16 + 16)88#define I16VE16 (1 * 16 * BPS)89#define I16HE16 (I16VE16 + 16)90// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)91#define C8DC8 (2 * 16 * BPS)92#define C8TM8 (C8DC8 + 1 * 16)93#define C8VE8 (2 * 16 * BPS + 8 * BPS)94#define C8HE8 (C8VE8 + 1 * 16)95// intra 4x496#define I4DC4 (3 * 16 * BPS + 0)97#define I4TM4 (I4DC4 + 4)98#define I4VE4 (I4DC4 + 8)99#define I4HE4 (I4DC4 + 12)100#define I4RD4 (I4DC4 + 16)101#define I4VR4 (I4DC4 + 20)102#define I4LD4 (I4DC4 + 24)103#define I4VL4 (I4DC4 + 28)104#define I4HD4 (3 * 16 * BPS + 4 * BPS)105#define I4HU4 (I4HD4 + 4)106#define I4TMP (I4HD4 + 8)107108typedef int64_t score_t; // type used for scores, rate, distortion109// Note that MAX_COST is not the maximum allowed by sizeof(score_t),110// in order to allow overflowing computations.111#define MAX_COST ((score_t)0x7fffffffffffffLL)112113#define QFIX 17114#define BIAS(b) ((b) << (QFIX - 8))115// Fun fact: this is the _only_ line where we're actually being lossy and116// discarding bits.117static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {118return (int)((n * iQ + B) >> QFIX);119}120121// Uncomment the following to remove token-buffer code:122// #define DISABLE_TOKEN_BUFFER123124// quality below which error-diffusion is enabled125#define ERROR_DIFFUSION_QUALITY 98126127//------------------------------------------------------------------------------128// Headers129130typedef uint32_t proba_t; // 16b + 16b131typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];132typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];133typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];134typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting135typedef const uint16_t* CostArrayMap[16][NUM_CTX];136typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats137138typedef struct VP8Encoder VP8Encoder;139140// segment features141typedef struct {142int num_segments; // Actual number of segments. 1 segment only = unused.143int update_map; // whether to update the segment map or not.144// must be 0 if there's only 1 segment.145int size; // bit-cost for transmitting the segment map146} VP8EncSegmentHeader;147148// Struct collecting all frame-persistent probabilities.149typedef struct {150uint8_t segments[3]; // probabilities for segment tree151uint8_t skip_proba; // final probability of being skipped.152ProbaArray coeffs[NUM_TYPES][NUM_BANDS]; // 1056 bytes153StatsArray stats[NUM_TYPES][NUM_BANDS]; // 4224 bytes154CostArray level_cost[NUM_TYPES][NUM_BANDS]; // 13056 bytes155CostArrayMap remapped_costs[NUM_TYPES]; // 1536 bytes156int dirty; // if true, need to call VP8CalculateLevelCosts()157int use_skip_proba; // Note: we always use skip_proba for now.158int nb_skip; // number of skipped blocks159} VP8EncProba;160161// Filter parameters. Not actually used in the code (we don't perform162// the in-loop filtering), but filled from user's config163typedef struct {164int simple; // filtering type: 0=complex, 1=simple165int level; // base filter level [0..63]166int sharpness; // [0..7]167int i4x4_lf_delta; // delta filter level for i4x4 relative to i16x16168} VP8EncFilterHeader;169170//------------------------------------------------------------------------------171// Informations about the macroblocks.172173typedef struct {174// block type175unsigned int type:2; // 0=i4x4, 1=i16x16176unsigned int uv_mode:2;177unsigned int skip:1;178unsigned int segment:2;179uint8_t alpha; // quantization-susceptibility180} VP8MBInfo;181182typedef struct VP8Matrix {183uint16_t q[16]; // quantizer steps184uint16_t iq[16]; // reciprocals, fixed point.185uint32_t bias[16]; // rounding bias186uint32_t zthresh[16]; // value below which a coefficient is zeroed187uint16_t sharpen[16]; // frequency boosters for slight sharpening188} VP8Matrix;189190typedef struct {191VP8Matrix y1, y2, uv; // quantization matrices192int alpha; // quant-susceptibility, range [-127,127]. Zero is neutral.193// Lower values indicate a lower risk of blurriness.194int beta; // filter-susceptibility, range [0,255].195int quant; // final segment quantizer.196int fstrength; // final in-loop filtering strength197int max_edge; // max edge delta (for filtering strength)198int min_disto; // minimum distortion required to trigger filtering record199// reactivities200int lambda_i16, lambda_i4, lambda_uv;201int lambda_mode, lambda_trellis, tlambda;202int lambda_trellis_i16, lambda_trellis_i4, lambda_trellis_uv;203204// lambda values for distortion-based evaluation205score_t i4_penalty; // penalty for using Intra4206} VP8SegmentInfo;207208typedef int8_t DError[2 /* u/v */][2 /* top or left */];209210// Handy transient struct to accumulate score and info during RD-optimization211// and mode evaluation.212typedef struct {213score_t D, SD; // Distortion, spectral distortion214score_t H, R, score; // header bits, rate, score.215int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.216int16_t y_ac_levels[16][16];217int16_t uv_levels[4 + 4][16];218int mode_i16; // mode number for intra16 prediction219uint8_t modes_i4[16]; // mode numbers for intra4 predictions220int mode_uv; // mode number of chroma prediction221uint32_t nz; // non-zero blocks222int8_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3223} VP8ModeScore;224225// Iterator structure to iterate through macroblocks, pointing to the226// right neighbouring data (samples, predictions, contexts, ...)227typedef struct {228int x, y; // current macroblock229uint8_t* yuv_in; // input samples230uint8_t* yuv_out; // output samples231uint8_t* yuv_out2; // secondary buffer swapped with yuv_out.232uint8_t* yuv_p; // scratch buffer for prediction233VP8Encoder* enc; // back-pointer234VP8MBInfo* mb; // current macroblock235VP8BitWriter* bw; // current bit-writer236uint8_t* preds; // intra mode predictors (4x4 blocks)237uint32_t* nz; // non-zero pattern238#if WEBP_AARCH64 && BPS == 32239uint8_t i4_boundary[40]; // 32+8 boundary samples needed by intra4x4240#else241uint8_t i4_boundary[37]; // 32+5 boundary samples needed by intra4x4242#endif243uint8_t* i4_top; // pointer to the current top boundary sample244int i4; // current intra4x4 mode being tested245int top_nz[9]; // top-non-zero context.246int left_nz[9]; // left-non-zero. left_nz[8] is independent.247uint64_t bit_count[4][3]; // bit counters for coded levels.248uint64_t luma_bits; // macroblock bit-cost for luma249uint64_t uv_bits; // macroblock bit-cost for chroma250LFStats* lf_stats; // filter stats (borrowed from enc)251int do_trellis; // if true, perform extra level optimisation252int count_down; // number of mb still to be processed253int count_down0; // starting counter value (for progress)254int percent0; // saved initial progress percent255256DError left_derr; // left error diffusion (u/v)257DError* top_derr; // top diffusion error - NULL if disabled258259uint8_t* y_left; // left luma samples (addressable from index -1 to 15).260uint8_t* u_left; // left u samples (addressable from index -1 to 7)261uint8_t* v_left; // left v samples (addressable from index -1 to 7)262263uint8_t* y_top; // top luma samples at position 'x'264uint8_t* uv_top; // top u/v samples at position 'x', packed as 16 bytes265266// memory for storing y/u/v_left267uint8_t yuv_left_mem[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];268// memory for yuv*269uint8_t yuv_mem[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];270} VP8EncIterator;271272// in iterator.c273// must be called first274void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);275// reset iterator position to row 'y'276void VP8IteratorSetRow(VP8EncIterator* const it, int y);277// set count down (=number of iterations to go)278void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);279// return true if iteration is finished280int VP8IteratorIsDone(const VP8EncIterator* const it);281// Import uncompressed samples from source.282// If tmp_32 is not NULL, import boundary samples too.283// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.284void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32);285// export decimated samples286void VP8IteratorExport(const VP8EncIterator* const it);287// go to next macroblock. Returns false if not finished.288int VP8IteratorNext(VP8EncIterator* const it);289// save the 'yuv_out' boundary values to 'top'/'left' arrays for next290// iterations.291void VP8IteratorSaveBoundary(VP8EncIterator* const it);292// Report progression based on macroblock rows. Return 0 for user-abort request.293int VP8IteratorProgress(const VP8EncIterator* const it, int delta);294// Intra4x4 iterations295void VP8IteratorStartI4(VP8EncIterator* const it);296// returns true if not done.297int VP8IteratorRotateI4(VP8EncIterator* const it,298const uint8_t* const yuv_out);299300// Non-zero context setup/teardown301void VP8IteratorNzToBytes(VP8EncIterator* const it);302void VP8IteratorBytesToNz(VP8EncIterator* const it);303304// Helper functions to set mode properties305void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);306void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);307void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);308void VP8SetSkip(const VP8EncIterator* const it, int skip);309void VP8SetSegment(const VP8EncIterator* const it, int segment);310311//------------------------------------------------------------------------------312// Paginated token buffer313314typedef struct VP8Tokens VP8Tokens; // struct details in token.c315316typedef struct {317#if !defined(DISABLE_TOKEN_BUFFER)318VP8Tokens* pages; // first page319VP8Tokens** last_page; // last page320uint16_t* tokens; // set to (*last_page)->tokens321int left; // how many free tokens left before the page is full322int page_size; // number of tokens per page323#endif324int error; // true in case of malloc error325} VP8TBuffer;326327// initialize an empty buffer328void VP8TBufferInit(VP8TBuffer* const b, int page_size);329void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory330331#if !defined(DISABLE_TOKEN_BUFFER)332333// Finalizes bitstream when probabilities are known.334// Deletes the allocated token memory if final_pass is true.335int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,336const uint8_t* const probas, int final_pass);337338// record the coding of coefficients without knowing the probabilities yet339int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res,340VP8TBuffer* const tokens);341342// Estimate the final coded size given a set of 'probas'.343size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);344345#endif // !DISABLE_TOKEN_BUFFER346347//------------------------------------------------------------------------------348// VP8Encoder349350struct VP8Encoder {351const WebPConfig* config; // user configuration and parameters352WebPPicture* pic; // input / output picture353354// headers355VP8EncFilterHeader filter_hdr; // filtering information356VP8EncSegmentHeader segment_hdr; // segment information357358int profile; // VP8's profile, deduced from Config.359360// dimension, in macroblock units.361int mb_w, mb_h;362int preds_w; // stride of the *preds prediction plane (=4*mb_w + 1)363364// number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)365int num_parts;366367// per-partition boolean decoders.368VP8BitWriter bw; // part0369VP8BitWriter parts[MAX_NUM_PARTITIONS]; // token partitions370VP8TBuffer tokens; // token buffer371372int percent; // for progress373374// transparency blob375int has_alpha;376uint8_t* alpha_data; // non-NULL if transparency is present377uint32_t alpha_data_size;378WebPWorker alpha_worker;379380// quantization info (one set of DC/AC dequant factor per segment)381VP8SegmentInfo dqm[NUM_MB_SEGMENTS];382int base_quant; // nominal quantizer value. Only used383// for relative coding of segments' quant.384int alpha; // global susceptibility (<=> complexity)385int uv_alpha; // U/V quantization susceptibility386// global offset of quantizers, shared by all segments387int dq_y1_dc;388int dq_y2_dc, dq_y2_ac;389int dq_uv_dc, dq_uv_ac;390391// probabilities and statistics392VP8EncProba proba;393uint64_t sse[4]; // sum of Y/U/V/A squared errors for all macroblocks394uint64_t sse_count; // pixel count for the sse[] stats395int coded_size;396int residual_bytes[3][4];397int block_count[3];398399// quality/speed settings400int method; // 0=fastest, 6=best/slowest.401VP8RDLevel rd_opt_level; // Deduced from method.402int max_i4_header_bits; // partition #0 safeness factor403int mb_header_limit; // rough limit for header bits per MB404int thread_level; // derived from config->thread_level405int do_search; // derived from config->target_XXX406int use_tokens; // if true, use token buffer407408// Memory409VP8MBInfo* mb_info; // contextual macroblock infos (mb_w + 1)410uint8_t* preds; // predictions modes: (4*mb_w+1) * (4*mb_h+1)411uint32_t* nz; // non-zero bit context: mb_w+1412uint8_t* y_top; // top luma samples.413uint8_t* uv_top; // top u/v samples.414// U and V are packed into 16 bytes (8 U + 8 V)415LFStats* lf_stats; // autofilter stats (if NULL, autofilter is off)416DError* top_derr; // diffusion error (NULL if disabled)417};418419//------------------------------------------------------------------------------420// internal functions. Not public.421422// in tree.c423extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];424extern const uint8_t425VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];426// Reset the token probabilities to their initial (default) values427void VP8DefaultProbas(VP8Encoder* const enc);428// Write the token probabilities429void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);430// Writes the partition #0 modes (that is: all intra modes)431void VP8CodeIntraModes(VP8Encoder* const enc);432433// in syntax.c434// Generates the final bitstream by coding the partition0 and headers,435// and appending an assembly of all the pre-coded token partitions.436// Return true if everything is ok.437int VP8EncWrite(VP8Encoder* const enc);438// Release memory allocated for bit-writing in VP8EncLoop & seq.439void VP8EncFreeBitWriters(VP8Encoder* const enc);440441// in frame.c442extern const uint8_t VP8Cat3[];443extern const uint8_t VP8Cat4[];444extern const uint8_t VP8Cat5[];445extern const uint8_t VP8Cat6[];446447// Form all the four Intra16x16 predictions in the 'yuv_p' cache448void VP8MakeLuma16Preds(const VP8EncIterator* const it);449// Form all the four Chroma8x8 predictions in the 'yuv_p' cache450void VP8MakeChroma8Preds(const VP8EncIterator* const it);451// Rate calculation452int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);453int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);454int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);455// Main coding calls456int VP8EncLoop(VP8Encoder* const enc);457int VP8EncTokenLoop(VP8Encoder* const enc);458459// in webpenc.c460// Assign an error code to a picture. Return false for convenience.461int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);462int WebPReportProgress(const WebPPicture* const pic,463int percent, int* const percent_store);464465// in analysis.c466// Main analysis loop. Decides the segmentations and complexity.467// Assigns a first guess for Intra16 and 'uvmode' prediction modes.468int VP8EncAnalyze(VP8Encoder* const enc);469470// in quant.c471// Sets up segment's quantization values, 'base_quant' and filter strengths.472void VP8SetSegmentParams(VP8Encoder* const enc, float quality);473// Pick best modes and fills the levels. Returns true if skipped.474int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,475VP8ModeScore* WEBP_RESTRICT const rd,476VP8RDLevel rd_opt);477478// in alpha.c479void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression480int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process481int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data482int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data483484// autofilter485void VP8InitFilter(VP8EncIterator* const it);486void VP8StoreFilterStats(VP8EncIterator* const it);487void VP8AdjustFilterStrength(VP8EncIterator* const it);488489// returns the approximate filtering strength needed to smooth a edge490// step of 'delta', given a sharpness parameter 'sharpness'.491int VP8FilterStrengthFromDelta(int sharpness, int delta);492493// misc utils for picture_*.c:494495// Returns true if 'picture' is non-NULL and dimensions/colorspace are within496// their valid ranges. If returning false, the 'error_code' in 'picture' is497// updated.498int WebPValidatePicture(const WebPPicture* const picture);499500// Remove reference to the ARGB/YUVA buffer (doesn't free anything).501void WebPPictureResetBuffers(WebPPicture* const picture);502503// Allocates ARGB buffer according to set width/height (previous one is504// always free'd). Preserves the YUV(A) buffer. Returns false in case of error505// (invalid param, out-of-memory).506int WebPPictureAllocARGB(WebPPicture* const picture);507508// Allocates YUVA buffer according to set width/height (previous one is always509// free'd). Uses picture->csp to determine whether an alpha buffer is needed.510// Preserves the ARGB buffer.511// Returns false in case of error (invalid param, out-of-memory).512int WebPPictureAllocYUVA(WebPPicture* const picture);513514// Replace samples that are fully transparent by 'color' to help compressibility515// (no guarantee, though). Assumes pic->use_argb is true.516void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color);517518//------------------------------------------------------------------------------519520#ifdef __cplusplus521} // extern "C"522#endif523524#endif // WEBP_ENC_VP8I_ENC_H_525526527