Path: blob/master/thirdparty/libwebp/src/dsp/cost_sse2.c
21643 views
// Copyright 2015 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// SSE2 version of cost functions10//11// Author: Skal ([email protected])1213#include "src/dsp/dsp.h"1415#if defined(WEBP_USE_SSE2)16#include <emmintrin.h>1718#include <assert.h>1920#include "src/webp/types.h"21#include "src/dsp/cpu.h"22#include "src/enc/cost_enc.h"23#include "src/enc/vp8i_enc.h"24#include "src/utils/utils.h"2526//------------------------------------------------------------------------------2728static void SetResidualCoeffs_SSE2(const int16_t* WEBP_RESTRICT const coeffs,29VP8Residual* WEBP_RESTRICT const res) {30const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));31const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));32// Use SSE2 to compare 16 values with a single instruction.33const __m128i zero = _mm_setzero_si128();34const __m128i m0 = _mm_packs_epi16(c0, c1);35const __m128i m1 = _mm_cmpeq_epi8(m0, zero);36// Get the comparison results as a bitmask into 16bits. Negate the mask to get37// the position of entries that are not equal to zero. We don't need to mask38// out least significant bits according to res->first, since coeffs[0] is 039// if res->first > 0.40const uint32_t mask = 0x0000ffffu ^ (uint32_t)_mm_movemask_epi8(m1);41// The position of the most significant non-zero bit indicates the position of42// the last non-zero value.43assert(res->first == 0 || coeffs[0] == 0);44res->last = mask ? BitsLog2Floor(mask) : -1;45res->coeffs = coeffs;46}4748static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {49uint8_t levels[16], ctxs[16];50uint16_t abs_levels[16];51int n = res->first;52// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 153const int p0 = res->prob[n][ctx0][0];54CostArrayPtr const costs = res->costs;55const uint16_t* t = costs[n][ctx0];56// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 057// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll58// be missing during the loop.59int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;6061if (res->last < 0) {62return VP8BitCost(0, p0);63}6465{ // precompute clamped levels and contexts, packed to 8b.66const __m128i zero = _mm_setzero_si128();67const __m128i kCst2 = _mm_set1_epi8(2);68const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);69const __m128i c0 = _mm_loadu_si128((const __m128i*)&res->coeffs[0]);70const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);71const __m128i D0 = _mm_sub_epi16(zero, c0);72const __m128i D1 = _mm_sub_epi16(zero, c1);73const __m128i E0 = _mm_max_epi16(c0, D0); // abs(v), 16b74const __m128i E1 = _mm_max_epi16(c1, D1);75const __m128i F = _mm_packs_epi16(E0, E1);76const __m128i G = _mm_min_epu8(F, kCst2); // context = 0,1,277const __m128i H = _mm_min_epu8(F, kCst67); // clamp_level in [0..67]7879_mm_storeu_si128((__m128i*)&ctxs[0], G);80_mm_storeu_si128((__m128i*)&levels[0], H);8182_mm_storeu_si128((__m128i*)&abs_levels[0], E0);83_mm_storeu_si128((__m128i*)&abs_levels[8], E1);84}85for (; n < res->last; ++n) {86const int ctx = ctxs[n];87const int level = levels[n];88const int flevel = abs_levels[n]; // full level89cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()90t = costs[n + 1][ctx];91}92// Last coefficient is always non-zero93{94const int level = levels[n];95const int flevel = abs_levels[n];96assert(flevel != 0);97cost += VP8LevelFixedCosts[flevel] + t[level];98if (n < 15) {99const int b = VP8EncBands[n + 1];100const int ctx = ctxs[n];101const int last_p0 = res->prob[b][ctx][0];102cost += VP8BitCost(0, last_p0);103}104}105return cost;106}107108//------------------------------------------------------------------------------109// Entry point110111extern void VP8EncDspCostInitSSE2(void);112113WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {114VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;115VP8GetResidualCost = GetResidualCost_SSE2;116}117118#else // !WEBP_USE_SSE2119120WEBP_DSP_INIT_STUB(VP8EncDspCostInitSSE2)121122#endif // WEBP_USE_SSE2123124125