Path: blob/master/thirdparty/libwebp/src/enc/iterator_enc.c
9913 views
// Copyright 2011 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// VP8Iterator: block iterator10//11// Author: Skal ([email protected])1213#include <string.h>1415#include "src/dsp/cpu.h"16#include "src/enc/vp8i_enc.h"1718//------------------------------------------------------------------------------19// VP8Iterator20//------------------------------------------------------------------------------2122static void InitLeft(VP8EncIterator* const it) {23it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =24(it->y_ > 0) ? 129 : 127;25memset(it->y_left_, 129, 16);26memset(it->u_left_, 129, 8);27memset(it->v_left_, 129, 8);28it->left_nz_[8] = 0;29if (it->top_derr_ != NULL) {30memset(&it->left_derr_, 0, sizeof(it->left_derr_));31}32}3334static void InitTop(VP8EncIterator* const it) {35const VP8Encoder* const enc = it->enc_;36const size_t top_size = enc->mb_w_ * 16;37memset(enc->y_top_, 127, 2 * top_size);38memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));39if (enc->top_derr_ != NULL) {40memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));41}42}4344void VP8IteratorSetRow(VP8EncIterator* const it, int y) {45VP8Encoder* const enc = it->enc_;46it->x_ = 0;47it->y_ = y;48it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];49it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;50it->nz_ = enc->nz_;51it->mb_ = enc->mb_info_ + y * enc->mb_w_;52it->y_top_ = enc->y_top_;53it->uv_top_ = enc->uv_top_;54InitLeft(it);55}5657// restart a scan58static void VP8IteratorReset(VP8EncIterator* const it) {59VP8Encoder* const enc = it->enc_;60VP8IteratorSetRow(it, 0);61VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default62InitTop(it);63memset(it->bit_count_, 0, sizeof(it->bit_count_));64it->do_trellis_ = 0;65}6667void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {68it->count_down_ = it->count_down0_ = count_down;69}7071int VP8IteratorIsDone(const VP8EncIterator* const it) {72return (it->count_down_ <= 0);73}7475void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {76it->enc_ = enc;77it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);78it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC;79it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;80it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC;81it->lf_stats_ = enc->lf_stats_;82it->percent0_ = enc->percent_;83it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);84it->u_left_ = it->y_left_ + 16 + 16;85it->v_left_ = it->u_left_ + 16;86it->top_derr_ = enc->top_derr_;87VP8IteratorReset(it);88}8990int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {91VP8Encoder* const enc = it->enc_;92if (delta && enc->pic_->progress_hook != NULL) {93const int done = it->count_down0_ - it->count_down_;94const int percent = (it->count_down0_ <= 0)95? it->percent0_96: it->percent0_ + delta * done / it->count_down0_;97return WebPReportProgress(enc->pic_, percent, &enc->percent_);98}99return 1;100}101102//------------------------------------------------------------------------------103// Import the source samples into the cache. Takes care of replicating104// boundary pixels if necessary.105106static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; }107108static void ImportBlock(const uint8_t* src, int src_stride,109uint8_t* dst, int w, int h, int size) {110int i;111for (i = 0; i < h; ++i) {112memcpy(dst, src, w);113if (w < size) {114memset(dst + w, dst[w - 1], size - w);115}116dst += BPS;117src += src_stride;118}119for (i = h; i < size; ++i) {120memcpy(dst, dst - BPS, size);121dst += BPS;122}123}124125static void ImportLine(const uint8_t* src, int src_stride,126uint8_t* dst, int len, int total_len) {127int i;128for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src;129for (; i < total_len; ++i) dst[i] = dst[len - 1];130}131132void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {133const VP8Encoder* const enc = it->enc_;134const int x = it->x_, y = it->y_;135const WebPPicture* const pic = enc->pic_;136const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;137const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;138const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;139const int w = MinSize(pic->width - x * 16, 16);140const int h = MinSize(pic->height - y * 16, 16);141const int uv_w = (w + 1) >> 1;142const int uv_h = (h + 1) >> 1;143144ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16);145ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);146ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);147148if (tmp_32 == NULL) return;149150// Import source (uncompressed) samples into boundary.151if (x == 0) {152InitLeft(it);153} else {154if (y == 0) {155it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;156} else {157it->y_left_[-1] = ysrc[- 1 - pic->y_stride];158it->u_left_[-1] = usrc[- 1 - pic->uv_stride];159it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];160}161ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16);162ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);163ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);164}165166it->y_top_ = tmp_32 + 0;167it->uv_top_ = tmp_32 + 16;168if (y == 0) {169memset(tmp_32, 127, 32 * sizeof(*tmp_32));170} else {171ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16);172ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8);173ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8);174}175}176177//------------------------------------------------------------------------------178// Copy back the compressed samples into user space if requested.179180static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,181int w, int h) {182while (h-- > 0) {183memcpy(dst, src, w);184dst += dst_stride;185src += BPS;186}187}188189void VP8IteratorExport(const VP8EncIterator* const it) {190const VP8Encoder* const enc = it->enc_;191if (enc->config_->show_compressed) {192const int x = it->x_, y = it->y_;193const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;194const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;195const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;196const WebPPicture* const pic = enc->pic_;197uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;198uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;199uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;200int w = (pic->width - x * 16);201int h = (pic->height - y * 16);202203if (w > 16) w = 16;204if (h > 16) h = 16;205206// Luma plane207ExportBlock(ysrc, ydst, pic->y_stride, w, h);208209{ // U/V planes210const int uv_w = (w + 1) >> 1;211const int uv_h = (h + 1) >> 1;212ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);213ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);214}215}216}217218//------------------------------------------------------------------------------219// Non-zero contexts setup/teardown220221// Nz bits:222// 0 1 2 3 Y223// 4 5 6 7224// 8 9 10 11225// 12 13 14 15226// 16 17 U227// 18 19228// 20 21 V229// 22 23230// 24 DC-intra16231232// Convert packed context to byte array233#define BIT(nz, n) (!!((nz) & (1 << (n))))234235void VP8IteratorNzToBytes(VP8EncIterator* const it) {236const int tnz = it->nz_[0], lnz = it->nz_[-1];237int* const top_nz = it->top_nz_;238int* const left_nz = it->left_nz_;239240// Top-Y241top_nz[0] = BIT(tnz, 12);242top_nz[1] = BIT(tnz, 13);243top_nz[2] = BIT(tnz, 14);244top_nz[3] = BIT(tnz, 15);245// Top-U246top_nz[4] = BIT(tnz, 18);247top_nz[5] = BIT(tnz, 19);248// Top-V249top_nz[6] = BIT(tnz, 22);250top_nz[7] = BIT(tnz, 23);251// DC252top_nz[8] = BIT(tnz, 24);253254// left-Y255left_nz[0] = BIT(lnz, 3);256left_nz[1] = BIT(lnz, 7);257left_nz[2] = BIT(lnz, 11);258left_nz[3] = BIT(lnz, 15);259// left-U260left_nz[4] = BIT(lnz, 17);261left_nz[5] = BIT(lnz, 19);262// left-V263left_nz[6] = BIT(lnz, 21);264left_nz[7] = BIT(lnz, 23);265// left-DC is special, iterated separately266}267268void VP8IteratorBytesToNz(VP8EncIterator* const it) {269uint32_t nz = 0;270const int* const top_nz = it->top_nz_;271const int* const left_nz = it->left_nz_;272// top273nz |= (top_nz[0] << 12) | (top_nz[1] << 13);274nz |= (top_nz[2] << 14) | (top_nz[3] << 15);275nz |= (top_nz[4] << 18) | (top_nz[5] << 19);276nz |= (top_nz[6] << 22) | (top_nz[7] << 23);277nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4278// left279nz |= (left_nz[0] << 3) | (left_nz[1] << 7);280nz |= (left_nz[2] << 11);281nz |= (left_nz[4] << 17) | (left_nz[6] << 21);282283*it->nz_ = nz;284}285286#undef BIT287288//------------------------------------------------------------------------------289// Advance to the next position, doing the bookkeeping.290291void VP8IteratorSaveBoundary(VP8EncIterator* const it) {292VP8Encoder* const enc = it->enc_;293const int x = it->x_, y = it->y_;294const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;295const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;296if (x < enc->mb_w_ - 1) { // left297int i;298for (i = 0; i < 16; ++i) {299it->y_left_[i] = ysrc[15 + i * BPS];300}301for (i = 0; i < 8; ++i) {302it->u_left_[i] = uvsrc[7 + i * BPS];303it->v_left_[i] = uvsrc[15 + i * BPS];304}305// top-left (before 'top'!)306it->y_left_[-1] = it->y_top_[15];307it->u_left_[-1] = it->uv_top_[0 + 7];308it->v_left_[-1] = it->uv_top_[8 + 7];309}310if (y < enc->mb_h_ - 1) { // top311memcpy(it->y_top_, ysrc + 15 * BPS, 16);312memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);313}314}315316int VP8IteratorNext(VP8EncIterator* const it) {317if (++it->x_ == it->enc_->mb_w_) {318VP8IteratorSetRow(it, ++it->y_);319} else {320it->preds_ += 4;321it->mb_ += 1;322it->nz_ += 1;323it->y_top_ += 16;324it->uv_top_ += 16;325}326return (0 < --it->count_down_);327}328329//------------------------------------------------------------------------------330// Helper function to set mode properties331332void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {333uint8_t* preds = it->preds_;334int y;335for (y = 0; y < 4; ++y) {336memset(preds, mode, 4);337preds += it->enc_->preds_w_;338}339it->mb_->type_ = 1;340}341342void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {343uint8_t* preds = it->preds_;344int y;345for (y = 4; y > 0; --y) {346memcpy(preds, modes, 4 * sizeof(*modes));347preds += it->enc_->preds_w_;348modes += 4;349}350it->mb_->type_ = 0;351}352353void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {354it->mb_->uv_mode_ = mode;355}356357void VP8SetSkip(const VP8EncIterator* const it, int skip) {358it->mb_->skip_ = skip;359}360361void VP8SetSegment(const VP8EncIterator* const it, int segment) {362it->mb_->segment_ = segment;363}364365//------------------------------------------------------------------------------366// Intra4x4 sub-blocks iteration367//368// We store and update the boundary samples into an array of 37 pixels. They369// are updated as we iterate and reconstructs each intra4x4 blocks in turn.370// The position of the samples has the following snake pattern:371//372// 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right373// --+-----------+-----------+-----------+-----------+374// 15| 19| 23| 27| 31|375// 14| 18| 22| 26| 30|376// 13| 17| 21| 25| 29|377// 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|378// --+-----------+-----------+-----------+-----------+379// 11| 15| 19| 23| 27|380// 10| 14| 18| 22| 26|381// 9| 13| 17| 21| 25|382// 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|383// --+-----------+-----------+-----------+-----------+384// 7| 11| 15| 19| 23|385// 6| 10| 14| 18| 22|386// 5| 9| 13| 17| 21|387// 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20|388// --+-----------+-----------+-----------+-----------+389// 3| 7| 11| 15| 19|390// 2| 6| 10| 14| 18|391// 1| 5| 9| 13| 17|392// 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16|393// --+-----------+-----------+-----------+-----------+394395// Array to record the position of the top sample to pass to the prediction396// functions in dsp.c.397static const uint8_t VP8TopLeftI4[16] = {39817, 21, 25, 29,39913, 17, 21, 25,4009, 13, 17, 21,4015, 9, 13, 17402};403404void VP8IteratorStartI4(VP8EncIterator* const it) {405const VP8Encoder* const enc = it->enc_;406int i;407408it->i4_ = 0; // first 4x4 sub-block409it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];410411// Import the boundary samples412for (i = 0; i < 17; ++i) { // left413it->i4_boundary_[i] = it->y_left_[15 - i];414}415for (i = 0; i < 16; ++i) { // top416it->i4_boundary_[17 + i] = it->y_top_[i];417}418// top-right samples have a special case on the far right of the picture419if (it->x_ < enc->mb_w_ - 1) {420for (i = 16; i < 16 + 4; ++i) {421it->i4_boundary_[17 + i] = it->y_top_[i];422}423} else { // else, replicate the last valid pixel four times424for (i = 16; i < 16 + 4; ++i) {425it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];426}427}428#if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)429// Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top430// is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used431// meaningfully, but due to limitations in MemorySanitizer related to432// modeling of tbl instructions, a warning will be issued. This can be433// removed if MSan is updated to support the instructions. See434// https://issues.webmproject.org/372109644.435memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);436#endif437VP8IteratorNzToBytes(it); // import the non-zero context438}439440int VP8IteratorRotateI4(VP8EncIterator* const it,441const uint8_t* const yuv_out) {442const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];443uint8_t* const top = it->i4_top_;444int i;445446// Update the cache with 7 fresh samples447for (i = 0; i <= 3; ++i) {448top[-4 + i] = blk[i + 3 * BPS]; // store future top samples449}450if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15451for (i = 0; i <= 2; ++i) { // store future left samples452top[i] = blk[3 + (2 - i) * BPS];453}454} else { // else replicate top-right samples, as says the specs.455for (i = 0; i <= 3; ++i) {456top[i] = top[i + 4];457}458}459// move pointers to next sub-block460++it->i4_;461if (it->i4_ == 16) { // we're done462return 0;463}464465it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];466return 1;467}468469//------------------------------------------------------------------------------470471472