Path: blob/master/thirdparty/libwebp/src/enc/analysis_enc.c
21222 views
// Copyright 2011 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// Macroblock analysis10//11// Author: Skal ([email protected])1213#include <assert.h>14#include <stdlib.h>15#include <string.h>1617#include "src/dec/common_dec.h"18#include "src/dsp/dsp.h"19#include "src/enc/vp8i_enc.h"20#include "src/utils/thread_utils.h"21#include "src/utils/utils.h"22#include "src/webp/encode.h"23#include "src/webp/types.h"2425#define MAX_ITERS_K_MEANS 62627//------------------------------------------------------------------------------28// Smooth the segment map by replacing isolated block by the majority of its29// neighbours.3031static void SmoothSegmentMap(VP8Encoder* const enc) {32int n, x, y;33const int w = enc->mb_w;34const int h = enc->mb_h;35const int majority_cnt_3_x_3_grid = 5;36uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));37assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec3839if (tmp == NULL) return;40for (y = 1; y < h - 1; ++y) {41for (x = 1; x < w - 1; ++x) {42int cnt[NUM_MB_SEGMENTS] = { 0 };43const VP8MBInfo* const mb = &enc->mb_info[x + w * y];44int majority_seg = mb->segment;45// Check the 8 neighbouring segment values.46cnt[mb[-w - 1].segment]++; // top-left47cnt[mb[-w + 0].segment]++; // top48cnt[mb[-w + 1].segment]++; // top-right49cnt[mb[ - 1].segment]++; // left50cnt[mb[ + 1].segment]++; // right51cnt[mb[ w - 1].segment]++; // bottom-left52cnt[mb[ w + 0].segment]++; // bottom53cnt[mb[ w + 1].segment]++; // bottom-right54for (n = 0; n < NUM_MB_SEGMENTS; ++n) {55if (cnt[n] >= majority_cnt_3_x_3_grid) {56majority_seg = n;57break;58}59}60tmp[x + y * w] = majority_seg;61}62}63for (y = 1; y < h - 1; ++y) {64for (x = 1; x < w - 1; ++x) {65VP8MBInfo* const mb = &enc->mb_info[x + w * y];66mb->segment = tmp[x + y * w];67}68}69WebPSafeFree(tmp);70}7172//------------------------------------------------------------------------------73// set segment susceptibility 'alpha' / 'beta'7475static WEBP_INLINE int clip(int v, int m, int M) {76return (v < m) ? m : (v > M) ? M : v;77}7879static void SetSegmentAlphas(VP8Encoder* const enc,80const int centers[NUM_MB_SEGMENTS],81int mid) {82const int nb = enc->segment_hdr.num_segments;83int min = centers[0], max = centers[0];84int n;8586if (nb > 1) {87for (n = 0; n < nb; ++n) {88if (min > centers[n]) min = centers[n];89if (max < centers[n]) max = centers[n];90}91}92if (max == min) max = min + 1;93assert(mid <= max && mid >= min);94for (n = 0; n < nb; ++n) {95const int alpha = 255 * (centers[n] - mid) / (max - min);96const int beta = 255 * (centers[n] - min) / (max - min);97enc->dqm[n].alpha = clip(alpha, -127, 127);98enc->dqm[n].beta = clip(beta, 0, 255);99}100}101102//------------------------------------------------------------------------------103// Compute susceptibility based on DCT-coeff histograms:104// the higher, the "easier" the macroblock is to compress.105106#define MAX_ALPHA 255 // 8b of precision for susceptibilities.107#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.108#define DEFAULT_ALPHA (-1)109#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))110111static int FinalAlphaValue(int alpha) {112alpha = MAX_ALPHA - alpha;113return clip(alpha, 0, MAX_ALPHA);114}115116static int GetAlpha(const VP8Histogram* const histo) {117// 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer118// values which happen to be mostly noise. This leaves the maximum precision119// for handling the useful small values which contribute most.120const int max_value = histo->max_value;121const int last_non_zero = histo->last_non_zero;122const int alpha =123(max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;124return alpha;125}126127static void InitHistogram(VP8Histogram* const histo) {128histo->max_value = 0;129histo->last_non_zero = 1;130}131132//------------------------------------------------------------------------------133// Simplified k-Means, to assign Nb segments based on alpha-histogram134135static void AssignSegments(VP8Encoder* const enc,136const int alphas[MAX_ALPHA + 1]) {137// 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an138// explicit check is needed to avoid spurious warning about 'n + 1' exceeding139// array bounds of 'centers' with some compilers (noticed with gcc-4.9).140const int nb = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS) ?141enc->segment_hdr.num_segments : NUM_MB_SEGMENTS;142int centers[NUM_MB_SEGMENTS];143int weighted_average = 0;144int map[MAX_ALPHA + 1];145int a, n, k;146int min_a = 0, max_a = MAX_ALPHA, range_a;147// 'int' type is ok for histo, and won't overflow148int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];149150assert(nb >= 1);151assert(nb <= NUM_MB_SEGMENTS);152153// bracket the input154for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}155min_a = n;156for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}157max_a = n;158range_a = max_a - min_a;159160// Spread initial centers evenly161for (k = 0, n = 1; k < nb; ++k, n += 2) {162assert(n < 2 * nb);163centers[k] = min_a + (n * range_a) / (2 * nb);164}165166for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough167int total_weight;168int displaced;169// Reset stats170for (n = 0; n < nb; ++n) {171accum[n] = 0;172dist_accum[n] = 0;173}174// Assign nearest center for each 'a'175n = 0; // track the nearest center for current 'a'176for (a = min_a; a <= max_a; ++a) {177if (alphas[a]) {178while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {179n++;180}181map[a] = n;182// accumulate contribution into best centroid183dist_accum[n] += a * alphas[a];184accum[n] += alphas[a];185}186}187// All point are classified. Move the centroids to the188// center of their respective cloud.189displaced = 0;190weighted_average = 0;191total_weight = 0;192for (n = 0; n < nb; ++n) {193if (accum[n]) {194const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];195displaced += abs(centers[n] - new_center);196centers[n] = new_center;197weighted_average += new_center * accum[n];198total_weight += accum[n];199}200}201weighted_average = (weighted_average + total_weight / 2) / total_weight;202if (displaced < 5) break; // no need to keep on looping...203}204205// Map each original value to the closest centroid206for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {207VP8MBInfo* const mb = &enc->mb_info[n];208const int alpha = mb->alpha;209mb->segment = map[alpha];210mb->alpha = centers[map[alpha]]; // for the record.211}212213if (nb > 1) {214const int smooth = (enc->config->preprocessing & 1);215if (smooth) SmoothSegmentMap(enc);216}217218SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.219}220221//------------------------------------------------------------------------------222// Macroblock analysis: collect histogram for each mode, deduce the maximal223// susceptibility and set best modes for this macroblock.224// Segment assignment is done later.225226// Number of modes to inspect for 'alpha' evaluation. We don't need to test all227// the possible modes during the analysis phase: we risk falling into a local228// optimum, or be subject to boundary effect229#define MAX_INTRA16_MODE 2230#define MAX_INTRA4_MODE 2231#define MAX_UV_MODE 2232233static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {234const int max_mode = MAX_INTRA16_MODE;235int mode;236int best_alpha = DEFAULT_ALPHA;237int best_mode = 0;238239VP8MakeLuma16Preds(it);240for (mode = 0; mode < max_mode; ++mode) {241VP8Histogram histo;242int alpha;243244InitHistogram(&histo);245VP8CollectHistogram(it->yuv_in + Y_OFF_ENC,246it->yuv_p + VP8I16ModeOffsets[mode],2470, 16, &histo);248alpha = GetAlpha(&histo);249if (IS_BETTER_ALPHA(alpha, best_alpha)) {250best_alpha = alpha;251best_mode = mode;252}253}254VP8SetIntra16Mode(it, best_mode);255return best_alpha;256}257258static int FastMBAnalyze(VP8EncIterator* const it) {259// Empirical cut-off value, should be around 16 (~=block size). We use the260// [8-17] range and favor intra4 at high quality, intra16 for low quality.261const int q = (int)it->enc->config->quality;262const uint32_t kThreshold = 8 + (17 - 8) * q / 100;263int k;264uint32_t dc[16], m, m2;265for (k = 0; k < 16; k += 4) {266VP8Mean16x4(it->yuv_in + Y_OFF_ENC + k * BPS, &dc[k]);267}268for (m = 0, m2 = 0, k = 0; k < 16; ++k) {269m += dc[k];270m2 += dc[k] * dc[k];271}272if (kThreshold * m2 < m * m) {273VP8SetIntra16Mode(it, 0); // DC16274} else {275const uint8_t modes[16] = { 0 }; // DC4276VP8SetIntra4Mode(it, modes);277}278return 0;279}280281static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {282int best_alpha = DEFAULT_ALPHA;283int smallest_alpha = 0;284int best_mode = 0;285const int max_mode = MAX_UV_MODE;286int mode;287288VP8MakeChroma8Preds(it);289for (mode = 0; mode < max_mode; ++mode) {290VP8Histogram histo;291int alpha;292InitHistogram(&histo);293VP8CollectHistogram(it->yuv_in + U_OFF_ENC,294it->yuv_p + VP8UVModeOffsets[mode],29516, 16 + 4 + 4, &histo);296alpha = GetAlpha(&histo);297if (IS_BETTER_ALPHA(alpha, best_alpha)) {298best_alpha = alpha;299}300// The best prediction mode tends to be the one with the smallest alpha.301if (mode == 0 || alpha < smallest_alpha) {302smallest_alpha = alpha;303best_mode = mode;304}305}306VP8SetIntraUVMode(it, best_mode);307return best_alpha;308}309310static void MBAnalyze(VP8EncIterator* const it,311int alphas[MAX_ALPHA + 1],312int* const alpha, int* const uv_alpha) {313const VP8Encoder* const enc = it->enc;314int best_alpha, best_uv_alpha;315316VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED317VP8SetSkip(it, 0); // not skipped318VP8SetSegment(it, 0); // default segment, spec-wise.319320if (enc->method <= 1) {321best_alpha = FastMBAnalyze(it);322} else {323best_alpha = MBAnalyzeBestIntra16Mode(it);324}325best_uv_alpha = MBAnalyzeBestUVMode(it);326327// Final susceptibility mix328best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;329best_alpha = FinalAlphaValue(best_alpha);330alphas[best_alpha]++;331it->mb->alpha = best_alpha; // for later remapping.332333// Accumulate for later complexity analysis.334*alpha += best_alpha; // mixed susceptibility (not just luma)335*uv_alpha += best_uv_alpha;336}337338static void DefaultMBInfo(VP8MBInfo* const mb) {339mb->type = 1; // I16x16340mb->uv_mode = 0;341mb->skip = 0; // not skipped342mb->segment = 0; // default segment343mb->alpha = 0;344}345346//------------------------------------------------------------------------------347// Main analysis loop:348// Collect all susceptibilities for each macroblock and record their349// distribution in alphas[]. Segments is assigned a-posteriori, based on350// this histogram.351// We also pick an intra16 prediction mode, which shouldn't be considered352// final except for fast-encode settings. We can also pick some intra4 modes353// and decide intra4/intra16, but that's usually almost always a bad choice at354// this stage.355356static void ResetAllMBInfo(VP8Encoder* const enc) {357int n;358for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {359DefaultMBInfo(&enc->mb_info[n]);360}361// Default susceptibilities.362enc->dqm[0].alpha = 0;363enc->dqm[0].beta = 0;364// Note: we can't compute this 'alpha' / 'uv_alpha' -> set to default value.365enc->alpha = 0;366enc->uv_alpha = 0;367WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);368}369370// struct used to collect job result371typedef struct {372WebPWorker worker;373int alphas[MAX_ALPHA + 1];374int alpha, uv_alpha;375VP8EncIterator it;376int delta_progress;377} SegmentJob;378379// main work call380static int DoSegmentsJob(void* arg1, void* arg2) {381SegmentJob* const job = (SegmentJob*)arg1;382VP8EncIterator* const it = (VP8EncIterator*)arg2;383int ok = 1;384if (!VP8IteratorIsDone(it)) {385uint8_t tmp[32 + WEBP_ALIGN_CST];386uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);387do {388// Let's pretend we have perfect lossless reconstruction.389VP8IteratorImport(it, scratch);390MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);391ok = VP8IteratorProgress(it, job->delta_progress);392} while (ok && VP8IteratorNext(it));393}394return ok;395}396397#ifdef WEBP_USE_THREAD398static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {399int i;400for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];401dst->alpha += src->alpha;402dst->uv_alpha += src->uv_alpha;403}404#endif405406// initialize the job struct with some tasks to perform407static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,408int start_row, int end_row) {409WebPGetWorkerInterface()->Init(&job->worker);410job->worker.data1 = job;411job->worker.data2 = &job->it;412job->worker.hook = DoSegmentsJob;413VP8IteratorInit(enc, &job->it);414VP8IteratorSetRow(&job->it, start_row);415VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w);416memset(job->alphas, 0, sizeof(job->alphas));417job->alpha = 0;418job->uv_alpha = 0;419// only one of both jobs can record the progress, since we don't420// expect the user's hook to be multi-thread safe421job->delta_progress = (start_row == 0) ? 20 : 0;422}423424// main entry point425int VP8EncAnalyze(VP8Encoder* const enc) {426int ok = 1;427const int do_segments =428enc->config->emulate_jpeg_size || // We need the complexity evaluation.429(enc->segment_hdr.num_segments > 1) ||430(enc->method <= 1); // for method 0 - 1, we need preds[] to be filled.431if (do_segments) {432const int last_row = enc->mb_h;433const int total_mb = last_row * enc->mb_w;434#ifdef WEBP_USE_THREAD435// We give a little more than a half work to the main thread.436const int split_row = (9 * last_row + 15) >> 4;437const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it438const int do_mt = (enc->thread_level > 0) && (split_row >= kMinSplitRow);439#else440const int do_mt = 0;441#endif442const WebPWorkerInterface* const worker_interface =443WebPGetWorkerInterface();444SegmentJob main_job;445if (do_mt) {446#ifdef WEBP_USE_THREAD447SegmentJob side_job;448// Note the use of '&' instead of '&&' because we must call the functions449// no matter what.450InitSegmentJob(enc, &main_job, 0, split_row);451InitSegmentJob(enc, &side_job, split_row, last_row);452// we don't need to call Reset() on main_job.worker, since we're calling453// WebPWorkerExecute() on it454ok &= worker_interface->Reset(&side_job.worker);455// launch the two jobs in parallel456if (ok) {457worker_interface->Launch(&side_job.worker);458worker_interface->Execute(&main_job.worker);459ok &= worker_interface->Sync(&side_job.worker);460ok &= worker_interface->Sync(&main_job.worker);461}462worker_interface->End(&side_job.worker);463if (ok) MergeJobs(&side_job, &main_job); // merge results together464#endif // WEBP_USE_THREAD465} else {466// Even for single-thread case, we use the generic Worker tools.467InitSegmentJob(enc, &main_job, 0, last_row);468worker_interface->Execute(&main_job.worker);469ok &= worker_interface->Sync(&main_job.worker);470}471worker_interface->End(&main_job.worker);472if (ok) {473enc->alpha = main_job.alpha / total_mb;474enc->uv_alpha = main_job.uv_alpha / total_mb;475AssignSegments(enc, main_job.alphas);476}477} else { // Use only one default segment.478ResetAllMBInfo(enc);479}480if (!ok) {481return WebPEncodingSetError(enc->pic,482VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise483}484return ok;485}486487488