Path: blob/master/thirdparty/libwebp/src/dsp/common_sse41.h
9913 views
// Copyright 2016 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// SSE4 code common to several files.10//11// Author: Vincent Rabaud ([email protected])1213#ifndef WEBP_DSP_COMMON_SSE41_H_14#define WEBP_DSP_COMMON_SSE41_H_1516#ifdef __cplusplus17extern "C" {18#endif1920#if defined(WEBP_USE_SSE41)21#include <smmintrin.h>2223//------------------------------------------------------------------------------24// Channel mixing.25// Shuffles the input buffer as A0 0 0 A1 0 0 A2 ...26#define WEBP_SSE41_SHUFF(OUT, IN0, IN1) \27OUT##0 = _mm_shuffle_epi8(*IN0, shuff0); \28OUT##1 = _mm_shuffle_epi8(*IN0, shuff1); \29OUT##2 = _mm_shuffle_epi8(*IN0, shuff2); \30OUT##3 = _mm_shuffle_epi8(*IN1, shuff0); \31OUT##4 = _mm_shuffle_epi8(*IN1, shuff1); \32OUT##5 = _mm_shuffle_epi8(*IN1, shuff2);3334// Pack the planar buffers35// rrrr... rrrr... gggg... gggg... bbbb... bbbb....36// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...37static WEBP_INLINE void VP8PlanarTo24b_SSE41(38__m128i* const in0, __m128i* const in1, __m128i* const in2,39__m128i* const in3, __m128i* const in4, __m128i* const in5) {40__m128i R0, R1, R2, R3, R4, R5;41__m128i G0, G1, G2, G3, G4, G5;42__m128i B0, B1, B2, B3, B4, B5;4344// Process R.45{46const __m128i shuff0 = _mm_set_epi8(475, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);48const __m128i shuff1 = _mm_set_epi8(49-1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);50const __m128i shuff2 = _mm_set_epi8(51-1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);52WEBP_SSE41_SHUFF(R, in0, in1)53}5455// Process G.56{57// Same as before, just shifted to the left by one and including the right58// padding.59const __m128i shuff0 = _mm_set_epi8(60-1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);61const __m128i shuff1 = _mm_set_epi8(6210, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);63const __m128i shuff2 = _mm_set_epi8(64-1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);65WEBP_SSE41_SHUFF(G, in2, in3)66}6768// Process B.69{70const __m128i shuff0 = _mm_set_epi8(71-1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);72const __m128i shuff1 = _mm_set_epi8(73-1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);74const __m128i shuff2 = _mm_set_epi8(7515, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);76WEBP_SSE41_SHUFF(B, in4, in5)77}7879// OR the different channels.80{81const __m128i RG0 = _mm_or_si128(R0, G0);82const __m128i RG1 = _mm_or_si128(R1, G1);83const __m128i RG2 = _mm_or_si128(R2, G2);84const __m128i RG3 = _mm_or_si128(R3, G3);85const __m128i RG4 = _mm_or_si128(R4, G4);86const __m128i RG5 = _mm_or_si128(R5, G5);87*in0 = _mm_or_si128(RG0, B0);88*in1 = _mm_or_si128(RG1, B1);89*in2 = _mm_or_si128(RG2, B2);90*in3 = _mm_or_si128(RG3, B3);91*in4 = _mm_or_si128(RG4, B4);92*in5 = _mm_or_si128(RG5, B5);93}94}9596#undef WEBP_SSE41_SHUFF9798// Convert four packed four-channel buffers like argbargbargbargb... into the99// split channels aaaaa ... rrrr ... gggg .... bbbbb ......100static WEBP_INLINE void VP8L32bToPlanar_SSE41(__m128i* const in0,101__m128i* const in1,102__m128i* const in2,103__m128i* const in3) {104// aaaarrrrggggbbbb105const __m128i shuff0 =106_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0);107const __m128i A0 = _mm_shuffle_epi8(*in0, shuff0);108const __m128i A1 = _mm_shuffle_epi8(*in1, shuff0);109const __m128i A2 = _mm_shuffle_epi8(*in2, shuff0);110const __m128i A3 = _mm_shuffle_epi8(*in3, shuff0);111// A0A1R0R1112// G0G1B0B1113// A2A3R2R3114// G0G1B0B1115const __m128i B0 = _mm_unpacklo_epi32(A0, A1);116const __m128i B1 = _mm_unpackhi_epi32(A0, A1);117const __m128i B2 = _mm_unpacklo_epi32(A2, A3);118const __m128i B3 = _mm_unpackhi_epi32(A2, A3);119*in3 = _mm_unpacklo_epi64(B0, B2);120*in2 = _mm_unpackhi_epi64(B0, B2);121*in1 = _mm_unpacklo_epi64(B1, B3);122*in0 = _mm_unpackhi_epi64(B1, B3);123}124125#endif // WEBP_USE_SSE41126127#ifdef __cplusplus128} // extern "C"129#endif130131#endif // WEBP_DSP_COMMON_SSE41_H_132133134