Path: blob/master/thirdparty/libwebp/src/dsp/lossless_sse41.c
9914 views
// Copyright 2021 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// SSE41 variant of methods for lossless decoder1011#include "src/dsp/dsp.h"1213#if defined(WEBP_USE_SSE41)1415#include "src/dsp/common_sse41.h"16#include "src/dsp/lossless.h"17#include "src/dsp/lossless_common.h"1819//------------------------------------------------------------------------------20// Color-space conversion functions2122static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,23const uint32_t* const src,24int num_pixels, uint32_t* dst) {25// sign-extended multiplying constants, pre-shifted by 5.26#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend27const __m128i mults_rb =28_mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |29(CST(green_to_blue_) & 0xffff)));30const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_));31#undef CST32const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);33const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,34-1, 9, -1, 9, -1, 13, -1, 13);35const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1,36-1, 10, -1, -1, -1, 14, -1, -1);37int i;38for (i = 0; i + 4 <= num_pixels; i += 4) {39const __m128i A = _mm_loadu_si128((const __m128i*)(src + i));40const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g041const __m128i C = _mm_mulhi_epi16(B, mults_rb);42const __m128i D = _mm_add_epi8(A, C);43const __m128i E = _mm_shuffle_epi8(D, perm2);44const __m128i F = _mm_mulhi_epi16(E, mults_b2);45const __m128i G = _mm_add_epi8(D, F);46const __m128i out = _mm_blendv_epi8(G, A, mask_ag);47_mm_storeu_si128((__m128i*)&dst[i], out);48}49// Fall-back to C-version for left-overs.50if (i != num_pixels) {51VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);52}53}5455//------------------------------------------------------------------------------5657#define ARGB_TO_RGB_SSE41 do { \58while (num_pixels >= 16) { \59const __m128i in0 = _mm_loadu_si128(in + 0); \60const __m128i in1 = _mm_loadu_si128(in + 1); \61const __m128i in2 = _mm_loadu_si128(in + 2); \62const __m128i in3 = _mm_loadu_si128(in + 3); \63const __m128i a0 = _mm_shuffle_epi8(in0, perm0); \64const __m128i a1 = _mm_shuffle_epi8(in1, perm1); \65const __m128i a2 = _mm_shuffle_epi8(in2, perm2); \66const __m128i a3 = _mm_shuffle_epi8(in3, perm3); \67const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \68const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \69const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \70_mm_storeu_si128(out + 0, b0); \71_mm_storeu_si128(out + 1, b1); \72_mm_storeu_si128(out + 2, b2); \73in += 4; \74out += 3; \75num_pixels -= 16; \76} \77} while (0)7879static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,80int num_pixels, uint8_t* WEBP_RESTRICT dst) {81const __m128i* in = (const __m128i*)src;82__m128i* out = (__m128i*)dst;83const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,848, 14, 13, 12, -1, -1, -1, -1);85const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);86const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);87const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);8889ARGB_TO_RGB_SSE41;9091// left-overs92if (num_pixels > 0) {93VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out);94}95}9697static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,98int num_pixels, uint8_t* WEBP_RESTRICT dst) {99const __m128i* in = (const __m128i*)src;100__m128i* out = (__m128i*)dst;101const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,10212, 13, 14, -1, -1, -1, -1);103const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);104const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);105const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);106107ARGB_TO_RGB_SSE41;108109// left-overs110if (num_pixels > 0) {111VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out);112}113}114115#undef ARGB_TO_RGB_SSE41116117//------------------------------------------------------------------------------118// Entry point119120extern void VP8LDspInitSSE41(void);121122WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {123VP8LTransformColorInverse = TransformColorInverse_SSE41;124VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;125VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;126}127128#else // !WEBP_USE_SSE41129130WEBP_DSP_INIT_STUB(VP8LDspInitSSE41)131132#endif // WEBP_USE_SSE41133134135