Path: blob/master/Utilities/cmliblzma/liblzma/check/crc32_arm64.h
3153 views
// SPDX-License-Identifier: 0BSD12///////////////////////////////////////////////////////////////////////////////3//4/// \file crc32_arm64.h5/// \brief CRC32 calculation with ARM64 optimization6//7// Authors: Chenxi Mao8// Jia Tan9// Hans Jansen10//11///////////////////////////////////////////////////////////////////////////////1213#ifndef LZMA_CRC32_ARM64_H14#define LZMA_CRC32_ARM64_H1516// MSVC always has the CRC intrinsics available when building for ARM6417// there is no need to include any header files.18#ifndef _MSC_VER19# include <arm_acle.h>20#endif2122// If both versions are going to be built, we need runtime detection23// to check if the instructions are supported.24#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)25# if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)26# include <sys/auxv.h>27# elif defined(_WIN32)28# include <processthreadsapi.h>29# elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)30# include <sys/sysctl.h>31# endif32#endif3334// Some EDG-based compilers support ARM64 and define __GNUC__35// (such as Nvidia's nvcc), but do not support function attributes.36//37// NOTE: Build systems check for this too, keep them in sync with this.38#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)39# define crc_attr_target __attribute__((__target__("+crc")))40#else41# define crc_attr_target42#endif434445crc_attr_target46static uint32_t47crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)48{49crc = ~crc;5051// Align the input buffer because this was shown to be52// significantly faster than unaligned accesses.53const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);5455for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)56crc = __crc32b(crc, *buf);5758size -= align_amount;5960// Process 8 bytes at a time. The end point is determined by61// ignoring the least significant three bits of size to ensure62// we do not process past the bounds of the buffer. This guarantees63// that limit is a multiple of 8 and is strictly less than size.64for (const uint8_t *limit = buf + (size & ~(size_t)7);65buf < limit; buf += 8)66crc = __crc32d(crc, aligned_read64le(buf));6768// Process the remaining bytes that are not 8 byte aligned.69for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)70crc = __crc32b(crc, *buf);7172return ~crc;73}747576#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)77static inline bool78is_arch_extension_supported(void)79{80#if defined(HAVE_GETAUXVAL)81return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;8283#elif defined(HAVE_ELF_AUX_INFO)84unsigned long feature_flags;8586if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0)87return false;8889return (feature_flags & HWCAP_CRC32) != 0;9091#elif defined(_WIN32)92return IsProcessorFeaturePresent(93PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);9495#elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)96int has_crc32 = 0;97size_t size = sizeof(has_crc32);9899// The sysctlbyname() function requires a string identifier for the100// CPU feature it tests. The Apple documentation lists the string101// "hw.optional.armv8_crc32", which can be found here:102// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619103if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32,104&size, NULL, 0) != 0)105return false;106107return has_crc32;108109#else110// If a runtime detection method cannot be found, then this must111// be a compile time error. The checks in crc_common.h should ensure112// a runtime detection method is always found if this function is113// built. It would be possible to just return false here, but this114// is inefficient for binary size and runtime since only the generic115// method could ever be used.116# error Runtime detection method unavailable.117#endif118}119#endif120121#endif // LZMA_CRC32_ARM64_H122123124