Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmliblzma/liblzma/check/crc32_arm64.h
3153 views
1
// SPDX-License-Identifier: 0BSD
2
3
///////////////////////////////////////////////////////////////////////////////
4
//
5
/// \file crc32_arm64.h
6
/// \brief CRC32 calculation with ARM64 optimization
7
//
8
// Authors: Chenxi Mao
9
// Jia Tan
10
// Hans Jansen
11
//
12
///////////////////////////////////////////////////////////////////////////////
13
14
#ifndef LZMA_CRC32_ARM64_H
15
#define LZMA_CRC32_ARM64_H
16
17
// MSVC always has the CRC intrinsics available when building for ARM64
18
// there is no need to include any header files.
19
#ifndef _MSC_VER
20
# include <arm_acle.h>
21
#endif
22
23
// If both versions are going to be built, we need runtime detection
24
// to check if the instructions are supported.
25
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
26
# if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
27
# include <sys/auxv.h>
28
# elif defined(_WIN32)
29
# include <processthreadsapi.h>
30
# elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
31
# include <sys/sysctl.h>
32
# endif
33
#endif
34
35
// Some EDG-based compilers support ARM64 and define __GNUC__
36
// (such as Nvidia's nvcc), but do not support function attributes.
37
//
38
// NOTE: Build systems check for this too, keep them in sync with this.
39
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
40
# define crc_attr_target __attribute__((__target__("+crc")))
41
#else
42
# define crc_attr_target
43
#endif
44
45
46
crc_attr_target
47
static uint32_t
48
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
49
{
50
crc = ~crc;
51
52
// Align the input buffer because this was shown to be
53
// significantly faster than unaligned accesses.
54
const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);
55
56
for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
57
crc = __crc32b(crc, *buf);
58
59
size -= align_amount;
60
61
// Process 8 bytes at a time. The end point is determined by
62
// ignoring the least significant three bits of size to ensure
63
// we do not process past the bounds of the buffer. This guarantees
64
// that limit is a multiple of 8 and is strictly less than size.
65
for (const uint8_t *limit = buf + (size & ~(size_t)7);
66
buf < limit; buf += 8)
67
crc = __crc32d(crc, aligned_read64le(buf));
68
69
// Process the remaining bytes that are not 8 byte aligned.
70
for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)
71
crc = __crc32b(crc, *buf);
72
73
return ~crc;
74
}
75
76
77
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
78
static inline bool
79
is_arch_extension_supported(void)
80
{
81
#if defined(HAVE_GETAUXVAL)
82
return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;
83
84
#elif defined(HAVE_ELF_AUX_INFO)
85
unsigned long feature_flags;
86
87
if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0)
88
return false;
89
90
return (feature_flags & HWCAP_CRC32) != 0;
91
92
#elif defined(_WIN32)
93
return IsProcessorFeaturePresent(
94
PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
95
96
#elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
97
int has_crc32 = 0;
98
size_t size = sizeof(has_crc32);
99
100
// The sysctlbyname() function requires a string identifier for the
101
// CPU feature it tests. The Apple documentation lists the string
102
// "hw.optional.armv8_crc32", which can be found here:
103
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619
104
if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32,
105
&size, NULL, 0) != 0)
106
return false;
107
108
return has_crc32;
109
110
#else
111
// If a runtime detection method cannot be found, then this must
112
// be a compile time error. The checks in crc_common.h should ensure
113
// a runtime detection method is always found if this function is
114
// built. It would be possible to just return false here, but this
115
// is inefficient for binary size and runtime since only the generic
116
// method could ever be used.
117
# error Runtime detection method unavailable.
118
#endif
119
}
120
#endif
121
122
#endif // LZMA_CRC32_ARM64_H
123
124