Path: blob/master/thirdparty/libwebp/src/dsp/cost_sse2.c
9913 views
// Copyright 2015 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// SSE2 version of cost functions10//11// Author: Skal ([email protected])1213#include "src/dsp/dsp.h"1415#if defined(WEBP_USE_SSE2)16#include <emmintrin.h>1718#include "src/enc/cost_enc.h"19#include "src/enc/vp8i_enc.h"20#include "src/utils/utils.h"2122//------------------------------------------------------------------------------2324static void SetResidualCoeffs_SSE2(const int16_t* WEBP_RESTRICT const coeffs,25VP8Residual* WEBP_RESTRICT const res) {26const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));27const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));28// Use SSE2 to compare 16 values with a single instruction.29const __m128i zero = _mm_setzero_si128();30const __m128i m0 = _mm_packs_epi16(c0, c1);31const __m128i m1 = _mm_cmpeq_epi8(m0, zero);32// Get the comparison results as a bitmask into 16bits. Negate the mask to get33// the position of entries that are not equal to zero. We don't need to mask34// out least significant bits according to res->first, since coeffs[0] is 035// if res->first > 0.36const uint32_t mask = 0x0000ffffu ^ (uint32_t)_mm_movemask_epi8(m1);37// The position of the most significant non-zero bit indicates the position of38// the last non-zero value.39assert(res->first == 0 || coeffs[0] == 0);40res->last = mask ? BitsLog2Floor(mask) : -1;41res->coeffs = coeffs;42}4344static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {45uint8_t levels[16], ctxs[16];46uint16_t abs_levels[16];47int n = res->first;48// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 149const int p0 = res->prob[n][ctx0][0];50CostArrayPtr const costs = res->costs;51const uint16_t* t = costs[n][ctx0];52// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 053// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll54// be missing during the loop.55int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;5657if (res->last < 0) {58return VP8BitCost(0, p0);59}6061{ // precompute clamped levels and contexts, packed to 8b.62const __m128i zero = _mm_setzero_si128();63const __m128i kCst2 = _mm_set1_epi8(2);64const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);65const __m128i c0 = _mm_loadu_si128((const __m128i*)&res->coeffs[0]);66const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);67const __m128i D0 = _mm_sub_epi16(zero, c0);68const __m128i D1 = _mm_sub_epi16(zero, c1);69const __m128i E0 = _mm_max_epi16(c0, D0); // abs(v), 16b70const __m128i E1 = _mm_max_epi16(c1, D1);71const __m128i F = _mm_packs_epi16(E0, E1);72const __m128i G = _mm_min_epu8(F, kCst2); // context = 0,1,273const __m128i H = _mm_min_epu8(F, kCst67); // clamp_level in [0..67]7475_mm_storeu_si128((__m128i*)&ctxs[0], G);76_mm_storeu_si128((__m128i*)&levels[0], H);7778_mm_storeu_si128((__m128i*)&abs_levels[0], E0);79_mm_storeu_si128((__m128i*)&abs_levels[8], E1);80}81for (; n < res->last; ++n) {82const int ctx = ctxs[n];83const int level = levels[n];84const int flevel = abs_levels[n]; // full level85cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()86t = costs[n + 1][ctx];87}88// Last coefficient is always non-zero89{90const int level = levels[n];91const int flevel = abs_levels[n];92assert(flevel != 0);93cost += VP8LevelFixedCosts[flevel] + t[level];94if (n < 15) {95const int b = VP8EncBands[n + 1];96const int ctx = ctxs[n];97const int last_p0 = res->prob[b][ctx][0];98cost += VP8BitCost(0, last_p0);99}100}101return cost;102}103104//------------------------------------------------------------------------------105// Entry point106107extern void VP8EncDspCostInitSSE2(void);108109WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {110VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;111VP8GetResidualCost = GetResidualCost_SSE2;112}113114#else // !WEBP_USE_SSE2115116WEBP_DSP_INIT_STUB(VP8EncDspCostInitSSE2)117118#endif // WEBP_USE_SSE2119120121