Path: blob/master/thirdparty/libwebp/src/enc/vp8i_enc.h
9913 views
// Copyright 2011 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// WebP encoder: internal header.10//11// Author: Skal ([email protected])1213#ifndef WEBP_ENC_VP8I_ENC_H_14#define WEBP_ENC_VP8I_ENC_H_1516#include <string.h> // for memcpy()17#include "src/dec/common_dec.h"18#include "src/dsp/cpu.h"19#include "src/dsp/dsp.h"20#include "src/utils/bit_writer_utils.h"21#include "src/utils/thread_utils.h"22#include "src/utils/utils.h"23#include "src/webp/encode.h"2425#ifdef __cplusplus26extern "C" {27#endif2829//------------------------------------------------------------------------------30// Various defines and enums3132// version numbers33#define ENC_MAJ_VERSION 134#define ENC_MIN_VERSION 535#define ENC_REV_VERSION 03637enum { MAX_LF_LEVELS = 64, // Maximum loop filter level38MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost39MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)40};4142typedef enum { // Rate-distortion optimization levels43RD_OPT_NONE = 0, // no rd-opt44RD_OPT_BASIC = 1, // basic scoring (no trellis)45RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only46RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)47} VP8RDLevel;4849// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).50// The original or reconstructed samples can be accessed using VP8Scan[].51// The predicted blocks can be accessed using offsets to yuv_p_ and52// the arrays VP8*ModeOffsets[].53// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)54// (see VP8Scan[] for accessing the blocks, along with55// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):56// +----+----+57// Y_OFF_ENC |YYYY|UUVV|58// U_OFF_ENC |YYYY|UUVV|59// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area60// |YYYY|....|61// +----+----+62// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)63// Intra16 predictions (16x16 block each, two per row):64// |I16DC16|I16TM16|65// |I16VE16|I16HE16|66// Chroma U/V predictions (16x8 block each, two per row):67// |C8DC8|C8TM8|68// |C8VE8|C8HE8|69// Intra 4x4 predictions (4x4 block each)70// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|71// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted72#define YUV_SIZE_ENC (BPS * 16)73#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds74#define Y_OFF_ENC (0)75#define U_OFF_ENC (16)76#define V_OFF_ENC (16 + 8)7778extern const uint16_t VP8Scan[16];79extern const uint16_t VP8UVModeOffsets[4];80extern const uint16_t VP8I16ModeOffsets[4];8182// Layout of prediction blocks83// intra 16x1684#define I16DC16 (0 * 16 * BPS)85#define I16TM16 (I16DC16 + 16)86#define I16VE16 (1 * 16 * BPS)87#define I16HE16 (I16VE16 + 16)88// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)89#define C8DC8 (2 * 16 * BPS)90#define C8TM8 (C8DC8 + 1 * 16)91#define C8VE8 (2 * 16 * BPS + 8 * BPS)92#define C8HE8 (C8VE8 + 1 * 16)93// intra 4x494#define I4DC4 (3 * 16 * BPS + 0)95#define I4TM4 (I4DC4 + 4)96#define I4VE4 (I4DC4 + 8)97#define I4HE4 (I4DC4 + 12)98#define I4RD4 (I4DC4 + 16)99#define I4VR4 (I4DC4 + 20)100#define I4LD4 (I4DC4 + 24)101#define I4VL4 (I4DC4 + 28)102#define I4HD4 (3 * 16 * BPS + 4 * BPS)103#define I4HU4 (I4HD4 + 4)104#define I4TMP (I4HD4 + 8)105106typedef int64_t score_t; // type used for scores, rate, distortion107// Note that MAX_COST is not the maximum allowed by sizeof(score_t),108// in order to allow overflowing computations.109#define MAX_COST ((score_t)0x7fffffffffffffLL)110111#define QFIX 17112#define BIAS(b) ((b) << (QFIX - 8))113// Fun fact: this is the _only_ line where we're actually being lossy and114// discarding bits.115static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {116return (int)((n * iQ + B) >> QFIX);117}118119// Uncomment the following to remove token-buffer code:120// #define DISABLE_TOKEN_BUFFER121122// quality below which error-diffusion is enabled123#define ERROR_DIFFUSION_QUALITY 98124125//------------------------------------------------------------------------------126// Headers127128typedef uint32_t proba_t; // 16b + 16b129typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];130typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];131typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];132typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting133typedef const uint16_t* CostArrayMap[16][NUM_CTX];134typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats135136typedef struct VP8Encoder VP8Encoder;137138// segment features139typedef struct {140int num_segments_; // Actual number of segments. 1 segment only = unused.141int update_map_; // whether to update the segment map or not.142// must be 0 if there's only 1 segment.143int size_; // bit-cost for transmitting the segment map144} VP8EncSegmentHeader;145146// Struct collecting all frame-persistent probabilities.147typedef struct {148uint8_t segments_[3]; // probabilities for segment tree149uint8_t skip_proba_; // final probability of being skipped.150ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes151StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes152CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes153CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes154int dirty_; // if true, need to call VP8CalculateLevelCosts()155int use_skip_proba_; // Note: we always use skip_proba for now.156int nb_skip_; // number of skipped blocks157} VP8EncProba;158159// Filter parameters. Not actually used in the code (we don't perform160// the in-loop filtering), but filled from user's config161typedef struct {162int simple_; // filtering type: 0=complex, 1=simple163int level_; // base filter level [0..63]164int sharpness_; // [0..7]165int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16166} VP8EncFilterHeader;167168//------------------------------------------------------------------------------169// Informations about the macroblocks.170171typedef struct {172// block type173unsigned int type_:2; // 0=i4x4, 1=i16x16174unsigned int uv_mode_:2;175unsigned int skip_:1;176unsigned int segment_:2;177uint8_t alpha_; // quantization-susceptibility178} VP8MBInfo;179180typedef struct VP8Matrix {181uint16_t q_[16]; // quantizer steps182uint16_t iq_[16]; // reciprocals, fixed point.183uint32_t bias_[16]; // rounding bias184uint32_t zthresh_[16]; // value below which a coefficient is zeroed185uint16_t sharpen_[16]; // frequency boosters for slight sharpening186} VP8Matrix;187188typedef struct {189VP8Matrix y1_, y2_, uv_; // quantization matrices190int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral.191// Lower values indicate a lower risk of blurriness.192int beta_; // filter-susceptibility, range [0,255].193int quant_; // final segment quantizer.194int fstrength_; // final in-loop filtering strength195int max_edge_; // max edge delta (for filtering strength)196int min_disto_; // minimum distortion required to trigger filtering record197// reactivities198int lambda_i16_, lambda_i4_, lambda_uv_;199int lambda_mode_, lambda_trellis_, tlambda_;200int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;201202// lambda values for distortion-based evaluation203score_t i4_penalty_; // penalty for using Intra4204} VP8SegmentInfo;205206typedef int8_t DError[2 /* u/v */][2 /* top or left */];207208// Handy transient struct to accumulate score and info during RD-optimization209// and mode evaluation.210typedef struct {211score_t D, SD; // Distortion, spectral distortion212score_t H, R, score; // header bits, rate, score.213int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.214int16_t y_ac_levels[16][16];215int16_t uv_levels[4 + 4][16];216int mode_i16; // mode number for intra16 prediction217uint8_t modes_i4[16]; // mode numbers for intra4 predictions218int mode_uv; // mode number of chroma prediction219uint32_t nz; // non-zero blocks220int8_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3221} VP8ModeScore;222223// Iterator structure to iterate through macroblocks, pointing to the224// right neighbouring data (samples, predictions, contexts, ...)225typedef struct {226int x_, y_; // current macroblock227uint8_t* yuv_in_; // input samples228uint8_t* yuv_out_; // output samples229uint8_t* yuv_out2_; // secondary buffer swapped with yuv_out_.230uint8_t* yuv_p_; // scratch buffer for prediction231VP8Encoder* enc_; // back-pointer232VP8MBInfo* mb_; // current macroblock233VP8BitWriter* bw_; // current bit-writer234uint8_t* preds_; // intra mode predictors (4x4 blocks)235uint32_t* nz_; // non-zero pattern236#if WEBP_AARCH64 && BPS == 32237uint8_t i4_boundary_[40]; // 32+8 boundary samples needed by intra4x4238#else239uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4240#endif241uint8_t* i4_top_; // pointer to the current top boundary sample242int i4_; // current intra4x4 mode being tested243int top_nz_[9]; // top-non-zero context.244int left_nz_[9]; // left-non-zero. left_nz[8] is independent.245uint64_t bit_count_[4][3]; // bit counters for coded levels.246uint64_t luma_bits_; // macroblock bit-cost for luma247uint64_t uv_bits_; // macroblock bit-cost for chroma248LFStats* lf_stats_; // filter stats (borrowed from enc_)249int do_trellis_; // if true, perform extra level optimisation250int count_down_; // number of mb still to be processed251int count_down0_; // starting counter value (for progress)252int percent0_; // saved initial progress percent253254DError left_derr_; // left error diffusion (u/v)255DError* top_derr_; // top diffusion error - NULL if disabled256257uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).258uint8_t* u_left_; // left u samples (addressable from index -1 to 7)259uint8_t* v_left_; // left v samples (addressable from index -1 to 7)260261uint8_t* y_top_; // top luma samples at position 'x_'262uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes263264// memory for storing y/u/v_left_265uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];266// memory for yuv_*267uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];268} VP8EncIterator;269270// in iterator.c271// must be called first272void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);273// reset iterator position to row 'y'274void VP8IteratorSetRow(VP8EncIterator* const it, int y);275// set count down (=number of iterations to go)276void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);277// return true if iteration is finished278int VP8IteratorIsDone(const VP8EncIterator* const it);279// Import uncompressed samples from source.280// If tmp_32 is not NULL, import boundary samples too.281// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.282void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32);283// export decimated samples284void VP8IteratorExport(const VP8EncIterator* const it);285// go to next macroblock. Returns false if not finished.286int VP8IteratorNext(VP8EncIterator* const it);287// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.288void VP8IteratorSaveBoundary(VP8EncIterator* const it);289// Report progression based on macroblock rows. Return 0 for user-abort request.290int VP8IteratorProgress(const VP8EncIterator* const it, int delta);291// Intra4x4 iterations292void VP8IteratorStartI4(VP8EncIterator* const it);293// returns true if not done.294int VP8IteratorRotateI4(VP8EncIterator* const it,295const uint8_t* const yuv_out);296297// Non-zero context setup/teardown298void VP8IteratorNzToBytes(VP8EncIterator* const it);299void VP8IteratorBytesToNz(VP8EncIterator* const it);300301// Helper functions to set mode properties302void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);303void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);304void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);305void VP8SetSkip(const VP8EncIterator* const it, int skip);306void VP8SetSegment(const VP8EncIterator* const it, int segment);307308//------------------------------------------------------------------------------309// Paginated token buffer310311typedef struct VP8Tokens VP8Tokens; // struct details in token.c312313typedef struct {314#if !defined(DISABLE_TOKEN_BUFFER)315VP8Tokens* pages_; // first page316VP8Tokens** last_page_; // last page317uint16_t* tokens_; // set to (*last_page_)->tokens_318int left_; // how many free tokens left before the page is full319int page_size_; // number of tokens per page320#endif321int error_; // true in case of malloc error322} VP8TBuffer;323324// initialize an empty buffer325void VP8TBufferInit(VP8TBuffer* const b, int page_size);326void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory327328#if !defined(DISABLE_TOKEN_BUFFER)329330// Finalizes bitstream when probabilities are known.331// Deletes the allocated token memory if final_pass is true.332int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,333const uint8_t* const probas, int final_pass);334335// record the coding of coefficients without knowing the probabilities yet336int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res,337VP8TBuffer* const tokens);338339// Estimate the final coded size given a set of 'probas'.340size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);341342#endif // !DISABLE_TOKEN_BUFFER343344//------------------------------------------------------------------------------345// VP8Encoder346347struct VP8Encoder {348const WebPConfig* config_; // user configuration and parameters349WebPPicture* pic_; // input / output picture350351// headers352VP8EncFilterHeader filter_hdr_; // filtering information353VP8EncSegmentHeader segment_hdr_; // segment information354355int profile_; // VP8's profile, deduced from Config.356357// dimension, in macroblock units.358int mb_w_, mb_h_;359int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)360361// number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)362int num_parts_;363364// per-partition boolean decoders.365VP8BitWriter bw_; // part0366VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions367VP8TBuffer tokens_; // token buffer368369int percent_; // for progress370371// transparency blob372int has_alpha_;373uint8_t* alpha_data_; // non-NULL if transparency is present374uint32_t alpha_data_size_;375WebPWorker alpha_worker_;376377// quantization info (one set of DC/AC dequant factor per segment)378VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];379int base_quant_; // nominal quantizer value. Only used380// for relative coding of segments' quant.381int alpha_; // global susceptibility (<=> complexity)382int uv_alpha_; // U/V quantization susceptibility383// global offset of quantizers, shared by all segments384int dq_y1_dc_;385int dq_y2_dc_, dq_y2_ac_;386int dq_uv_dc_, dq_uv_ac_;387388// probabilities and statistics389VP8EncProba proba_;390uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks391uint64_t sse_count_; // pixel count for the sse_[] stats392int coded_size_;393int residual_bytes_[3][4];394int block_count_[3];395396// quality/speed settings397int method_; // 0=fastest, 6=best/slowest.398VP8RDLevel rd_opt_level_; // Deduced from method_.399int max_i4_header_bits_; // partition #0 safeness factor400int mb_header_limit_; // rough limit for header bits per MB401int thread_level_; // derived from config->thread_level402int do_search_; // derived from config->target_XXX403int use_tokens_; // if true, use token buffer404405// Memory406VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)407uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)408uint32_t* nz_; // non-zero bit context: mb_w+1409uint8_t* y_top_; // top luma samples.410uint8_t* uv_top_; // top u/v samples.411// U and V are packed into 16 bytes (8 U + 8 V)412LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off)413DError* top_derr_; // diffusion error (NULL if disabled)414};415416//------------------------------------------------------------------------------417// internal functions. Not public.418419// in tree.c420extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];421extern const uint8_t422VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];423// Reset the token probabilities to their initial (default) values424void VP8DefaultProbas(VP8Encoder* const enc);425// Write the token probabilities426void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);427// Writes the partition #0 modes (that is: all intra modes)428void VP8CodeIntraModes(VP8Encoder* const enc);429430// in syntax.c431// Generates the final bitstream by coding the partition0 and headers,432// and appending an assembly of all the pre-coded token partitions.433// Return true if everything is ok.434int VP8EncWrite(VP8Encoder* const enc);435// Release memory allocated for bit-writing in VP8EncLoop & seq.436void VP8EncFreeBitWriters(VP8Encoder* const enc);437438// in frame.c439extern const uint8_t VP8Cat3[];440extern const uint8_t VP8Cat4[];441extern const uint8_t VP8Cat5[];442extern const uint8_t VP8Cat6[];443444// Form all the four Intra16x16 predictions in the yuv_p_ cache445void VP8MakeLuma16Preds(const VP8EncIterator* const it);446// Form all the four Chroma8x8 predictions in the yuv_p_ cache447void VP8MakeChroma8Preds(const VP8EncIterator* const it);448// Rate calculation449int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);450int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);451int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);452// Main coding calls453int VP8EncLoop(VP8Encoder* const enc);454int VP8EncTokenLoop(VP8Encoder* const enc);455456// in webpenc.c457// Assign an error code to a picture. Return false for convenience.458int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);459int WebPReportProgress(const WebPPicture* const pic,460int percent, int* const percent_store);461462// in analysis.c463// Main analysis loop. Decides the segmentations and complexity.464// Assigns a first guess for Intra16 and uvmode_ prediction modes.465int VP8EncAnalyze(VP8Encoder* const enc);466467// in quant.c468// Sets up segment's quantization values, base_quant_ and filter strengths.469void VP8SetSegmentParams(VP8Encoder* const enc, float quality);470// Pick best modes and fills the levels. Returns true if skipped.471int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,472VP8ModeScore* WEBP_RESTRICT const rd,473VP8RDLevel rd_opt);474475// in alpha.c476void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression477int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process478int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data479int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data480481// autofilter482void VP8InitFilter(VP8EncIterator* const it);483void VP8StoreFilterStats(VP8EncIterator* const it);484void VP8AdjustFilterStrength(VP8EncIterator* const it);485486// returns the approximate filtering strength needed to smooth a edge487// step of 'delta', given a sharpness parameter 'sharpness'.488int VP8FilterStrengthFromDelta(int sharpness, int delta);489490// misc utils for picture_*.c:491492// Returns true if 'picture' is non-NULL and dimensions/colorspace are within493// their valid ranges. If returning false, the 'error_code' in 'picture' is494// updated.495int WebPValidatePicture(const WebPPicture* const picture);496497// Remove reference to the ARGB/YUVA buffer (doesn't free anything).498void WebPPictureResetBuffers(WebPPicture* const picture);499500// Allocates ARGB buffer according to set width/height (previous one is501// always free'd). Preserves the YUV(A) buffer. Returns false in case of error502// (invalid param, out-of-memory).503int WebPPictureAllocARGB(WebPPicture* const picture);504505// Allocates YUVA buffer according to set width/height (previous one is always506// free'd). Uses picture->csp to determine whether an alpha buffer is needed.507// Preserves the ARGB buffer.508// Returns false in case of error (invalid param, out-of-memory).509int WebPPictureAllocYUVA(WebPPicture* const picture);510511// Replace samples that are fully transparent by 'color' to help compressibility512// (no guarantee, though). Assumes pic->use_argb is true.513void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color);514515//------------------------------------------------------------------------------516517#ifdef __cplusplus518} // extern "C"519#endif520521#endif // WEBP_ENC_VP8I_ENC_H_522523524