Path: blob/a-new-beginning/SharedDependencies/Sources/cryptopp/crc_simd.cpp
2 views
// crc_simd.cpp - written and placed in the public domain by1// Jeffrey Walton, Uri Blumenthal and Marcel Raad.2//3// This source file uses intrinsics to gain access to SSE4.2 and4// ARMv8a CRC-32 and CRC-32C instructions. A separate source file5// is needed because additional CXXFLAGS are required to enable6// the appropriate instructions sets in some build configurations.78#include "pch.h"9#include "config.h"10#include "misc.h"1112#if (CRYPTOPP_SSE42_AVAILABLE)13# include <nmmintrin.h>14#endif1516#if (CRYPTOPP_ARM_ACLE_HEADER)17# include <stdint.h>18# include <arm_acle.h>19#endif2021#if (CRYPTOPP_ARM_CRC32_AVAILABLE)22# include "arm_simd.h"23#endif2425#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY26# include <signal.h>27# include <setjmp.h>28#endif2930#if CRYPTOPP_MSC_VERSION31# pragma warning(disable: 4244)32#endif3334#ifndef EXCEPTION_EXECUTE_HANDLER35# define EXCEPTION_EXECUTE_HANDLER 136#endif3738#define CONST_WORD32_CAST(x) ((const word32 *)(void*)(x))39#define CONST_WORD64_CAST(x) ((const word64 *)(void*)(x))4041// Squash MS LNK4221 and libtool warnings42extern const char CRC_SIMD_FNAME[] = __FILE__;4344NAMESPACE_BEGIN(CryptoPP)4546#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY47extern "C" {48typedef void (*SigHandler)(int);4950static jmp_buf s_jmpSIGILL;51static void SigIllHandler(int)52{53longjmp(s_jmpSIGILL, 1);54}55}56#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY5758#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARMV8)5960bool CPU_ProbeCRC32()61{62#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)63return false;64#elif (CRYPTOPP_ARM_CRC32_AVAILABLE)65# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)66volatile bool result = true;67__try68{69word32 w=0, x=1; byte z=3;70w = CRC32W(w,x);71w = CRC32B(w,z);72w = CRC32CW(w,x);73w = CRC32CB(w,z);7475result = !!w;76}77__except (EXCEPTION_EXECUTE_HANDLER)78{79return false;80}81return result;82#else8384// longjmp and clobber warnings. Volatile is required.85// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/772185486volatile bool result = true;8788volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);89if (oldHandler == SIG_ERR)90return false;9192volatile sigset_t oldMask;93if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))94{95signal(SIGILL, oldHandler);96return false;97}9899if (setjmp(s_jmpSIGILL))100result = false;101else102{103word32 w=0, x=1; byte z=3;104w = CRC32W(w,x);105w = CRC32B(w,z);106w = CRC32CW(w,x);107w = CRC32CB(w,z);108109// Hack... GCC optimizes away the code and returns true110result = !!w;111}112113sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);114signal(SIGILL, oldHandler);115return result;116# endif117#else118return false;119#endif // CRYPTOPP_ARM_CRC32_AVAILABLE120}121#endif // ARM32 or ARM64122123#if (CRYPTOPP_ARM_CRC32_AVAILABLE)124void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c)125{126for(; !IsAligned<word32>(s) && n > 0; s++, n--)127c = CRC32B(c, *s);128129for(; n >= 16; s+=16, n-=16)130c = CRC32Wx4(c, CONST_WORD32_CAST(s));131132for(; n >= 4; s+=4, n-=4)133c = CRC32W(c, *CONST_WORD32_CAST(s));134135for(; n > 0; s++, n--)136c = CRC32B(c, *s);137}138139void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c)140{141for(; !IsAligned<word32>(s) && n > 0; s++, n--)142c = CRC32CB(c, *s);143144for(; n >= 16; s+=16, n-=16)145c = CRC32CWx4(c, CONST_WORD32_CAST(s));146147for(; n >= 4; s+=4, n-=4)148c = CRC32CW(c, *CONST_WORD32_CAST(s));149150for(; n > 0; s++, n--)151c = CRC32CB(c, *s);152}153#endif154155#if (CRYPTOPP_SSE42_AVAILABLE)156void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c)157{158// Temporary due to https://github.com/weidai11/cryptopp/issues/1202159word32 v = c;160161// 64-bit code path due to https://github.com/weidai11/cryptopp/issues/1202162#if CRYPTOPP_BOOL_X64163for(; !IsAligned<word64>(s) && n > 0; s++, n--)164v = _mm_crc32_u8(v, *s);165#else166for(; !IsAligned<word32>(s) && n > 0; s++, n--)167v = _mm_crc32_u8(v, *s);168#endif169170#if CRYPTOPP_BOOL_X64171for(; n >= 32; s+=32, n-=32)172{173v = _mm_crc32_u64(_mm_crc32_u64(_mm_crc32_u64(_mm_crc32_u64(v,174*CONST_WORD64_CAST(s+ 0)), *CONST_WORD64_CAST(s+ 8)),175*CONST_WORD64_CAST(s+16)), *CONST_WORD64_CAST(s+24));176}177#endif178179for(; n >= 16; s+=16, n-=16)180{181v = _mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(v,182*CONST_WORD32_CAST(s+ 0)), *CONST_WORD32_CAST(s+ 4)),183*CONST_WORD32_CAST(s+ 8)), *CONST_WORD32_CAST(s+12));184}185186for(; n >= 4; s+=4, n-=4)187v = _mm_crc32_u32(v, *CONST_WORD32_CAST(s));188189for(; n > 0; s++, n--)190v = _mm_crc32_u8(v, *s);191192c = static_cast<word32>(v);193}194#endif195196NAMESPACE_END197198199