Path: blob/master/thirdparty/libwebp/src/dec/frame_dec.c
21585 views
// Copyright 2010 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// Frame-reconstruction function. Memory allocation.10//11// Author: Skal ([email protected])1213#include <assert.h>14#include <stdlib.h>15#include <string.h>1617#include "src/dec/common_dec.h"18#include "src/dec/vp8_dec.h"19#include "src/dec/vp8i_dec.h"20#include "src/dec/webpi_dec.h"21#include "src/dsp/dsp.h"22#include "src/utils/random_utils.h"23#include "src/utils/thread_utils.h"24#include "src/utils/utils.h"25#include "src/webp/decode.h"26#include "src/webp/types.h"2728//------------------------------------------------------------------------------29// Main reconstruction function.3031static const uint16_t kScan[16] = {320 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,330 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,340 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,350 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS36};3738static int CheckMode(int mb_x, int mb_y, int mode) {39if (mode == B_DC_PRED) {40if (mb_x == 0) {41return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;42} else {43return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;44}45}46return mode;47}4849static void Copy32b(uint8_t* const dst, const uint8_t* const src) {50memcpy(dst, src, 4);51}5253static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,54uint8_t* const dst) {55switch (bits >> 30) {56case 3:57VP8Transform(src, dst, 0);58break;59case 2:60VP8TransformAC3(src, dst);61break;62case 1:63VP8TransformDC(src, dst);64break;65default:66break;67}68}6970static void DoUVTransform(uint32_t bits, const int16_t* const src,71uint8_t* const dst) {72if (bits & 0xff) { // any non-zero coeff at all?73if (bits & 0xaa) { // any non-zero AC coefficient?74VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V75} else {76VP8TransformDCUV(src, dst);77}78}79}8081static void ReconstructRow(const VP8Decoder* const dec,82const VP8ThreadContext* ctx) {83int j;84int mb_x;85const int mb_y = ctx->mb_y;86const int cache_id = ctx->id;87uint8_t* const y_dst = dec->yuv_b + Y_OFF;88uint8_t* const u_dst = dec->yuv_b + U_OFF;89uint8_t* const v_dst = dec->yuv_b + V_OFF;9091// Initialize left-most block.92for (j = 0; j < 16; ++j) {93y_dst[j * BPS - 1] = 129;94}95for (j = 0; j < 8; ++j) {96u_dst[j * BPS - 1] = 129;97v_dst[j * BPS - 1] = 129;98}99100// Init top-left sample on left column too.101if (mb_y > 0) {102y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;103} else {104// we only need to do this init once at block (0,0).105// Afterward, it remains valid for the whole topmost row.106memset(y_dst - BPS - 1, 127, 16 + 4 + 1);107memset(u_dst - BPS - 1, 127, 8 + 1);108memset(v_dst - BPS - 1, 127, 8 + 1);109}110111// Reconstruct one row.112for (mb_x = 0; mb_x < dec->mb_w; ++mb_x) {113const VP8MBData* const block = ctx->mb_data + mb_x;114115// Rotate in the left samples from previously decoded block. We move four116// pixels at a time for alignment reason, and because of in-loop filter.117if (mb_x > 0) {118for (j = -1; j < 16; ++j) {119Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);120}121for (j = -1; j < 8; ++j) {122Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);123Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);124}125}126{127// bring top samples into the cache128VP8TopSamples* const top_yuv = dec->yuv_t + mb_x;129const int16_t* const coeffs = block->coeffs;130uint32_t bits = block->non_zero_y;131int n;132133if (mb_y > 0) {134memcpy(y_dst - BPS, top_yuv[0].y, 16);135memcpy(u_dst - BPS, top_yuv[0].u, 8);136memcpy(v_dst - BPS, top_yuv[0].v, 8);137}138139// predict and add residuals140if (block->is_i4x4) { // 4x4141uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);142143if (mb_y > 0) {144if (mb_x >= dec->mb_w - 1) { // on rightmost border145memset(top_right, top_yuv[0].y[15], sizeof(*top_right));146} else {147memcpy(top_right, top_yuv[1].y, sizeof(*top_right));148}149}150// replicate the top-right pixels below151top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];152153// predict and add residuals for all 4x4 blocks in turn.154for (n = 0; n < 16; ++n, bits <<= 2) {155uint8_t* const dst = y_dst + kScan[n];156VP8PredLuma4[block->imodes[n]](dst);157DoTransform(bits, coeffs + n * 16, dst);158}159} else { // 16x16160const int pred_func = CheckMode(mb_x, mb_y, block->imodes[0]);161VP8PredLuma16[pred_func](y_dst);162if (bits != 0) {163for (n = 0; n < 16; ++n, bits <<= 2) {164DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);165}166}167}168{169// Chroma170const uint32_t bits_uv = block->non_zero_uv;171const int pred_func = CheckMode(mb_x, mb_y, block->uvmode);172VP8PredChroma8[pred_func](u_dst);173VP8PredChroma8[pred_func](v_dst);174DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);175DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);176}177178// stash away top samples for next block179if (mb_y < dec->mb_h - 1) {180memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);181memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);182memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);183}184}185// Transfer reconstructed samples from yuv_b cache to final destination.186{187const int y_offset = cache_id * 16 * dec->cache_y_stride;188const int uv_offset = cache_id * 8 * dec->cache_uv_stride;189uint8_t* const y_out = dec->cache_y + mb_x * 16 + y_offset;190uint8_t* const u_out = dec->cache_u + mb_x * 8 + uv_offset;191uint8_t* const v_out = dec->cache_v + mb_x * 8 + uv_offset;192for (j = 0; j < 16; ++j) {193memcpy(y_out + j * dec->cache_y_stride, y_dst + j * BPS, 16);194}195for (j = 0; j < 8; ++j) {196memcpy(u_out + j * dec->cache_uv_stride, u_dst + j * BPS, 8);197memcpy(v_out + j * dec->cache_uv_stride, v_dst + j * BPS, 8);198}199}200}201}202203//------------------------------------------------------------------------------204// Filtering205206// kFilterExtraRows[] = How many extra lines are needed on the MB boundary207// for caching, given a filtering level.208// Simple filter: up to 2 luma samples are read and 1 is written.209// Complex filter: up to 4 luma samples are read and 3 are written. Same for210// U/V, so it's 8 samples total (because of the 2x upsampling).211static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };212213static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {214const VP8ThreadContext* const ctx = &dec->thread_ctx;215const int cache_id = ctx->id;216const int y_bps = dec->cache_y_stride;217const VP8FInfo* const f_info = ctx->f_info + mb_x;218uint8_t* const y_dst = dec->cache_y + cache_id * 16 * y_bps + mb_x * 16;219const int ilevel = f_info->f_ilevel;220const int limit = f_info->f_limit;221if (limit == 0) {222return;223}224assert(limit >= 3);225if (dec->filter_type == 1) { // simple226if (mb_x > 0) {227VP8SimpleHFilter16(y_dst, y_bps, limit + 4);228}229if (f_info->f_inner) {230VP8SimpleHFilter16i(y_dst, y_bps, limit);231}232if (mb_y > 0) {233VP8SimpleVFilter16(y_dst, y_bps, limit + 4);234}235if (f_info->f_inner) {236VP8SimpleVFilter16i(y_dst, y_bps, limit);237}238} else { // complex239const int uv_bps = dec->cache_uv_stride;240uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;241uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;242const int hev_thresh = f_info->hev_thresh;243if (mb_x > 0) {244VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);245VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);246}247if (f_info->f_inner) {248VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);249VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);250}251if (mb_y > 0) {252VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);253VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);254}255if (f_info->f_inner) {256VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);257VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);258}259}260}261262// Filter the decoded macroblock row (if needed)263static void FilterRow(const VP8Decoder* const dec) {264int mb_x;265const int mb_y = dec->thread_ctx.mb_y;266assert(dec->thread_ctx.filter_row);267for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {268DoFilter(dec, mb_x, mb_y);269}270}271272//------------------------------------------------------------------------------273// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.274275static void PrecomputeFilterStrengths(VP8Decoder* const dec) {276if (dec->filter_type > 0) {277int s;278const VP8FilterHeader* const hdr = &dec->filter_hdr;279for (s = 0; s < NUM_MB_SEGMENTS; ++s) {280int i4x4;281// First, compute the initial level282int base_level;283if (dec->segment_hdr.use_segment) {284base_level = dec->segment_hdr.filter_strength[s];285if (!dec->segment_hdr.absolute_delta) {286base_level += hdr->level;287}288} else {289base_level = hdr->level;290}291for (i4x4 = 0; i4x4 <= 1; ++i4x4) {292VP8FInfo* const info = &dec->fstrengths[s][i4x4];293int level = base_level;294if (hdr->use_lf_delta) {295level += hdr->ref_lf_delta[0];296if (i4x4) {297level += hdr->mode_lf_delta[0];298}299}300level = (level < 0) ? 0 : (level > 63) ? 63 : level;301if (level > 0) {302int ilevel = level;303if (hdr->sharpness > 0) {304if (hdr->sharpness > 4) {305ilevel >>= 2;306} else {307ilevel >>= 1;308}309if (ilevel > 9 - hdr->sharpness) {310ilevel = 9 - hdr->sharpness;311}312}313if (ilevel < 1) ilevel = 1;314info->f_ilevel = ilevel;315info->f_limit = 2 * level + ilevel;316info->hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;317} else {318info->f_limit = 0; // no filtering319}320info->f_inner = i4x4;321}322}323}324}325326//------------------------------------------------------------------------------327// Dithering328329// minimal amp that will provide a non-zero dithering effect330#define MIN_DITHER_AMP 4331332#define DITHER_AMP_TAB_SIZE 12333static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {334// roughly, it's dqm->uv_mat[1]3358, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1336};337338void VP8InitDithering(const WebPDecoderOptions* const options,339VP8Decoder* const dec) {340assert(dec != NULL);341if (options != NULL) {342const int d = options->dithering_strength;343const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;344const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);345if (f > 0) {346int s;347int all_amp = 0;348for (s = 0; s < NUM_MB_SEGMENTS; ++s) {349VP8QuantMatrix* const dqm = &dec->dqm[s];350if (dqm->uv_quant < DITHER_AMP_TAB_SIZE) {351const int idx = (dqm->uv_quant < 0) ? 0 : dqm->uv_quant;352dqm->dither = (f * kQuantToDitherAmp[idx]) >> 3;353}354all_amp |= dqm->dither;355}356if (all_amp != 0) {357VP8InitRandom(&dec->dithering_rg, 1.0f);358dec->dither = 1;359}360}361// potentially allow alpha dithering362dec->alpha_dithering = options->alpha_dithering_strength;363if (dec->alpha_dithering > 100) {364dec->alpha_dithering = 100;365} else if (dec->alpha_dithering < 0) {366dec->alpha_dithering = 0;367}368}369}370371// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100372static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {373uint8_t dither[64];374int i;375for (i = 0; i < 8 * 8; ++i) {376dither[i] = VP8RandomBits2(rg, VP8_DITHER_AMP_BITS + 1, amp);377}378VP8DitherCombine8x8(dither, dst, bps);379}380381static void DitherRow(VP8Decoder* const dec) {382int mb_x;383assert(dec->dither);384for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {385const VP8ThreadContext* const ctx = &dec->thread_ctx;386const VP8MBData* const data = ctx->mb_data + mb_x;387const int cache_id = ctx->id;388const int uv_bps = dec->cache_uv_stride;389if (data->dither >= MIN_DITHER_AMP) {390uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;391uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;392Dither8x8(&dec->dithering_rg, u_dst, uv_bps, data->dither);393Dither8x8(&dec->dithering_rg, v_dst, uv_bps, data->dither);394}395}396}397398//------------------------------------------------------------------------------399// This function is called after a row of macroblocks is finished decoding.400// It also takes into account the following restrictions:401// * In case of in-loop filtering, we must hold off sending some of the bottom402// pixels as they are yet unfiltered. They will be when the next macroblock403// row is decoded. Meanwhile, we must preserve them by rotating them in the404// cache area. This doesn't hold for the very bottom row of the uncropped405// picture of course.406// * we must clip the remaining pixels against the cropping area. The VP8Io407// struct must have the following fields set correctly before calling put():408409#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB410411// Finalize and transmit a complete row. Return false in case of user-abort.412static int FinishRow(void* arg1, void* arg2) {413VP8Decoder* const dec = (VP8Decoder*)arg1;414VP8Io* const io = (VP8Io*)arg2;415int ok = 1;416const VP8ThreadContext* const ctx = &dec->thread_ctx;417const int cache_id = ctx->id;418const int extra_y_rows = kFilterExtraRows[dec->filter_type];419const int ysize = extra_y_rows * dec->cache_y_stride;420const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride;421const int y_offset = cache_id * 16 * dec->cache_y_stride;422const int uv_offset = cache_id * 8 * dec->cache_uv_stride;423uint8_t* const ydst = dec->cache_y - ysize + y_offset;424uint8_t* const udst = dec->cache_u - uvsize + uv_offset;425uint8_t* const vdst = dec->cache_v - uvsize + uv_offset;426const int mb_y = ctx->mb_y;427const int is_first_row = (mb_y == 0);428const int is_last_row = (mb_y >= dec->br_mb_y - 1);429430if (dec->mt_method == 2) {431ReconstructRow(dec, ctx);432}433434if (ctx->filter_row) {435FilterRow(dec);436}437438if (dec->dither) {439DitherRow(dec);440}441442if (io->put != NULL) {443int y_start = MACROBLOCK_VPOS(mb_y);444int y_end = MACROBLOCK_VPOS(mb_y + 1);445if (!is_first_row) {446y_start -= extra_y_rows;447io->y = ydst;448io->u = udst;449io->v = vdst;450} else {451io->y = dec->cache_y + y_offset;452io->u = dec->cache_u + uv_offset;453io->v = dec->cache_v + uv_offset;454}455456if (!is_last_row) {457y_end -= extra_y_rows;458}459if (y_end > io->crop_bottom) {460y_end = io->crop_bottom; // make sure we don't overflow on last row.461}462// If dec->alpha_data is not NULL, we have some alpha plane present.463io->a = NULL;464if (dec->alpha_data != NULL && y_start < y_end) {465io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);466if (io->a == NULL) {467return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,468"Could not decode alpha data.");469}470}471if (y_start < io->crop_top) {472const int delta_y = io->crop_top - y_start;473y_start = io->crop_top;474assert(!(delta_y & 1));475io->y += dec->cache_y_stride * delta_y;476io->u += dec->cache_uv_stride * (delta_y >> 1);477io->v += dec->cache_uv_stride * (delta_y >> 1);478if (io->a != NULL) {479io->a += io->width * delta_y;480}481}482if (y_start < y_end) {483io->y += io->crop_left;484io->u += io->crop_left >> 1;485io->v += io->crop_left >> 1;486if (io->a != NULL) {487io->a += io->crop_left;488}489io->mb_y = y_start - io->crop_top;490io->mb_w = io->crop_right - io->crop_left;491io->mb_h = y_end - y_start;492ok = io->put(io);493}494}495// rotate top samples if needed496if (cache_id + 1 == dec->num_caches) {497if (!is_last_row) {498memcpy(dec->cache_y - ysize, ydst + 16 * dec->cache_y_stride, ysize);499memcpy(dec->cache_u - uvsize, udst + 8 * dec->cache_uv_stride, uvsize);500memcpy(dec->cache_v - uvsize, vdst + 8 * dec->cache_uv_stride, uvsize);501}502}503504return ok;505}506507#undef MACROBLOCK_VPOS508509//------------------------------------------------------------------------------510511int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {512int ok = 1;513VP8ThreadContext* const ctx = &dec->thread_ctx;514const int filter_row =515(dec->filter_type > 0) &&516(dec->mb_y >= dec->tl_mb_y) && (dec->mb_y <= dec->br_mb_y);517if (dec->mt_method == 0) {518// ctx->id and ctx->f_info are already set519ctx->mb_y = dec->mb_y;520ctx->filter_row = filter_row;521ReconstructRow(dec, ctx);522ok = FinishRow(dec, io);523} else {524WebPWorker* const worker = &dec->worker;525// Finish previous job *before* updating context526ok &= WebPGetWorkerInterface()->Sync(worker);527assert(worker->status == OK);528if (ok) { // spawn a new deblocking/output job529ctx->io = *io;530ctx->id = dec->cache_id;531ctx->mb_y = dec->mb_y;532ctx->filter_row = filter_row;533if (dec->mt_method == 2) { // swap macroblock data534VP8MBData* const tmp = ctx->mb_data;535ctx->mb_data = dec->mb_data;536dec->mb_data = tmp;537} else {538// perform reconstruction directly in main thread539ReconstructRow(dec, ctx);540}541if (filter_row) { // swap filter info542VP8FInfo* const tmp = ctx->f_info;543ctx->f_info = dec->f_info;544dec->f_info = tmp;545}546// (reconstruct)+filter in parallel547WebPGetWorkerInterface()->Launch(worker);548if (++dec->cache_id == dec->num_caches) {549dec->cache_id = 0;550}551}552}553return ok;554}555556//------------------------------------------------------------------------------557// Finish setting up the decoding parameter once user's setup() is called.558559VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {560// Call setup() first. This may trigger additional decoding features on 'io'.561// Note: Afterward, we must call teardown() no matter what.562if (io->setup != NULL && !io->setup(io)) {563VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");564return dec->status;565}566567// Disable filtering per user request568if (io->bypass_filtering) {569dec->filter_type = 0;570}571572// Define the area where we can skip in-loop filtering, in case of cropping.573//574// 'Simple' filter reads two luma samples outside of the macroblock575// and filters one. It doesn't filter the chroma samples. Hence, we can576// avoid doing the in-loop filtering before crop_top/crop_left position.577// For the 'Complex' filter, 3 samples are read and up to 3 are filtered.578// Means: there's a dependency chain that goes all the way up to the579// top-left corner of the picture (MB #0). We must filter all the previous580// macroblocks.581{582const int extra_pixels = kFilterExtraRows[dec->filter_type];583if (dec->filter_type == 2) {584// For complex filter, we need to preserve the dependency chain.585dec->tl_mb_x = 0;586dec->tl_mb_y = 0;587} else {588// For simple filter, we can filter only the cropped region.589// We include 'extra_pixels' on the other side of the boundary, since590// vertical or horizontal filtering of the previous macroblock can591// modify some abutting pixels.592dec->tl_mb_x = (io->crop_left - extra_pixels) >> 4;593dec->tl_mb_y = (io->crop_top - extra_pixels) >> 4;594if (dec->tl_mb_x < 0) dec->tl_mb_x = 0;595if (dec->tl_mb_y < 0) dec->tl_mb_y = 0;596}597// We need some 'extra' pixels on the right/bottom.598dec->br_mb_y = (io->crop_bottom + 15 + extra_pixels) >> 4;599dec->br_mb_x = (io->crop_right + 15 + extra_pixels) >> 4;600if (dec->br_mb_x > dec->mb_w) {601dec->br_mb_x = dec->mb_w;602}603if (dec->br_mb_y > dec->mb_h) {604dec->br_mb_y = dec->mb_h;605}606}607PrecomputeFilterStrengths(dec);608return VP8_STATUS_OK;609}610611int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {612int ok = 1;613if (dec->mt_method > 0) {614ok = WebPGetWorkerInterface()->Sync(&dec->worker);615}616617if (io->teardown != NULL) {618io->teardown(io);619}620return ok;621}622623//------------------------------------------------------------------------------624// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.625//626// Reason is: the deblocking filter cannot deblock the bottom horizontal edges627// immediately, and needs to wait for first few rows of the next macroblock to628// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending629// on strength).630// With two threads, the vertical positions of the rows being decoded are:631// Decode: [ 0..15][16..31][32..47][48..63][64..79][...632// Deblock: [ 0..11][12..27][28..43][44..59][...633// If we use two threads and two caches of 16 pixels, the sequence would be:634// Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...635// Deblock: [ 0..11][12..27!!][-4..11][12..27][...636// The problem occurs during row [12..15!!] that both the decoding and637// deblocking threads are writing simultaneously.638// With 3 cache lines, one get a safe write pattern:639// Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..640// Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28...641// Note that multi-threaded output _without_ deblocking can make use of two642// cache lines of 16 pixels only, since there's no lagging behind. The decoding643// and output process have non-concurrent writing:644// Decode: [ 0..15][16..31][ 0..15][16..31][...645// io->put: [ 0..15][16..31][ 0..15][...646647#define MT_CACHE_LINES 3648#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case649650// Initialize multi/single-thread worker651static int InitThreadContext(VP8Decoder* const dec) {652dec->cache_id = 0;653if (dec->mt_method > 0) {654WebPWorker* const worker = &dec->worker;655if (!WebPGetWorkerInterface()->Reset(worker)) {656return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,657"thread initialization failed.");658}659worker->data1 = dec;660worker->data2 = (void*)&dec->thread_ctx.io;661worker->hook = FinishRow;662dec->num_caches =663(dec->filter_type > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;664} else {665dec->num_caches = ST_CACHE_LINES;666}667return 1;668}669670int VP8GetThreadMethod(const WebPDecoderOptions* const options,671const WebPHeaderStructure* const headers,672int width, int height) {673if (options == NULL || options->use_threads == 0) {674return 0;675}676(void)headers;677(void)width;678(void)height;679assert(headers == NULL || !headers->is_lossless);680#if defined(WEBP_USE_THREAD)681if (width >= MIN_WIDTH_FOR_THREADS) return 2;682#endif683return 0;684}685686#undef MT_CACHE_LINES687#undef ST_CACHE_LINES688689//------------------------------------------------------------------------------690// Memory setup691692static int AllocateMemory(VP8Decoder* const dec) {693const int num_caches = dec->num_caches;694const int mb_w = dec->mb_w;695// Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.696const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);697const size_t top_size = sizeof(VP8TopSamples) * mb_w;698const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);699const size_t f_info_size =700(dec->filter_type > 0) ?701mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)702: 0;703const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);704const size_t mb_data_size =705(dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);706const size_t cache_height = (16 * num_caches707+ kFilterExtraRows[dec->filter_type]) * 3 / 2;708const size_t cache_size = top_size * cache_height;709// alpha_size is the only one that scales as width x height.710const uint64_t alpha_size = (dec->alpha_data != NULL) ?711(uint64_t)dec->pic_hdr.width * dec->pic_hdr.height : 0ULL;712const uint64_t needed = (uint64_t)intra_pred_mode_size713+ top_size + mb_info_size + f_info_size714+ yuv_size + mb_data_size715+ cache_size + alpha_size + WEBP_ALIGN_CST;716uint8_t* mem;717718if (!CheckSizeOverflow(needed)) return 0; // check for overflow719if (needed > dec->mem_size) {720WebPSafeFree(dec->mem);721dec->mem_size = 0;722dec->mem = WebPSafeMalloc(needed, sizeof(uint8_t));723if (dec->mem == NULL) {724return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,725"no memory during frame initialization.");726}727// down-cast is ok, thanks to WebPSafeMalloc() above.728dec->mem_size = (size_t)needed;729}730731mem = (uint8_t*)dec->mem;732dec->intra_t = mem;733mem += intra_pred_mode_size;734735dec->yuv_t = (VP8TopSamples*)mem;736mem += top_size;737738dec->mb_info = ((VP8MB*)mem) + 1;739mem += mb_info_size;740741dec->f_info = f_info_size ? (VP8FInfo*)mem : NULL;742mem += f_info_size;743dec->thread_ctx.id = 0;744dec->thread_ctx.f_info = dec->f_info;745if (dec->filter_type > 0 && dec->mt_method > 0) {746// secondary cache line. The deblocking process need to make use of the747// filtering strength from previous macroblock row, while the new ones748// are being decoded in parallel. We'll just swap the pointers.749dec->thread_ctx.f_info += mb_w;750}751752mem = (uint8_t*)WEBP_ALIGN(mem);753assert((yuv_size & WEBP_ALIGN_CST) == 0);754dec->yuv_b = mem;755mem += yuv_size;756757dec->mb_data = (VP8MBData*)mem;758dec->thread_ctx.mb_data = (VP8MBData*)mem;759if (dec->mt_method == 2) {760dec->thread_ctx.mb_data += mb_w;761}762mem += mb_data_size;763764dec->cache_y_stride = 16 * mb_w;765dec->cache_uv_stride = 8 * mb_w;766{767const int extra_rows = kFilterExtraRows[dec->filter_type];768const int extra_y = extra_rows * dec->cache_y_stride;769const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;770dec->cache_y = mem + extra_y;771dec->cache_u = dec->cache_y772+ 16 * num_caches * dec->cache_y_stride + extra_uv;773dec->cache_v = dec->cache_u774+ 8 * num_caches * dec->cache_uv_stride + extra_uv;775dec->cache_id = 0;776}777mem += cache_size;778779// alpha plane780dec->alpha_plane = alpha_size ? mem : NULL;781mem += alpha_size;782assert(mem <= (uint8_t*)dec->mem + dec->mem_size);783784// note: left/top-info is initialized once for all.785memset(dec->mb_info - 1, 0, mb_info_size);786VP8InitScanline(dec); // initialize left too.787788// initialize top789memset(dec->intra_t, B_DC_PRED, intra_pred_mode_size);790791return 1;792}793794static void InitIo(VP8Decoder* const dec, VP8Io* io) {795// prepare 'io'796io->mb_y = 0;797io->y = dec->cache_y;798io->u = dec->cache_u;799io->v = dec->cache_v;800io->y_stride = dec->cache_y_stride;801io->uv_stride = dec->cache_uv_stride;802io->a = NULL;803}804805int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {806if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches.807if (!AllocateMemory(dec)) return 0;808InitIo(dec, io);809VP8DspInit(); // Init critical function pointers and look-up tables.810return 1;811}812813//------------------------------------------------------------------------------814815816