CoCalc -- d2s_intrinsics.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/libcxx/src/include/ryu/d2s_intrinsics.h
³⁵²⁶³ views
1
//===----------------------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8

9
// Copyright (c) Microsoft Corporation.
10
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11

12
// Copyright 2018 Ulf Adams
13
// Copyright (c) Microsoft Corporation. All rights reserved.
14

15
// Boost Software License - Version 1.0 - August 17th, 2003
16

17
// Permission is hereby granted, free of charge, to any person or organization
18
// obtaining a copy of the software and accompanying documentation covered by
19
// this license (the "Software") to use, reproduce, display, distribute,
20
// execute, and transmit the Software, and to prepare derivative works of the
21
// Software, and to permit third-parties to whom the Software is furnished to
22
// do so, all subject to the following:
23

24
// The copyright notices in the Software and this entire statement, including
25
// the above license grant, this restriction and the following disclaimer,
26
// must be included in all copies of the Software, in whole or in part, and
27
// all derivative works of the Software, unless such copies or derivative
28
// works are solely in the form of machine-executable object code generated by
29
// a source language processor.
30

31
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
34
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
35
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
36
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
37
// DEALINGS IN THE SOFTWARE.
38

39
#ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
40
#define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
41

42
// Avoid formatting to keep the changes with the original code minimal.
43
// clang-format off
44

45
#include <__assert>
46
#include <__config>
47

48
#include "include/ryu/ryu.h"
49

50
_LIBCPP_BEGIN_NAMESPACE_STD
51

52
#if defined(_M_X64) && defined(_MSC_VER)
53
#define _LIBCPP_INTRINSIC128 1
54
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
55
  return _umul128(__a, __b, __productHi);
56
}
57

58
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
59
  // For the __shiftright128 intrinsic, the shift value is always
60
  // modulo 64.
61
  // In the current implementation of the double-precision version
62
  // of Ryu, the shift value is always < 64.
63
  // (The shift value is in the range [49, 58].)
64
  // Check this here in case a future change requires larger shift
65
  // values. In this case this function needs to be adjusted.
66
  _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
67
  return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist));
68
}
69

70
// ^^^ intrinsics available ^^^ / vvv __int128 available vvv
71
#elif defined(__SIZEOF_INT128__) && ( \
72
    (defined(__clang__) && !defined(_MSC_VER)) || \
73
    (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__)))
74
#define _LIBCPP_INTRINSIC128 1
75
  // We have __uint128 support in clang or gcc
76
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
77
  auto __temp = __a * (unsigned __int128)__b;
78
  *__productHi = __temp >> 64;
79
  return static_cast<uint64_t>(__temp);
80
}
81

82
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
83
  // In the current implementation of the double-precision version
84
  // of Ryu, the shift value is always < 64.
85
  // (The shift value is in the range [49, 58].)
86
  // Check this here in case a future change requires larger shift
87
  // values. In this case this function needs to be adjusted.
88
  _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
89
  auto __temp = __lo | ((unsigned __int128)__hi << 64);
90
  // For x64 128-bit shfits using the `shrd` instruction and two 64-bit
91
  // registers, the shift value is modulo 64.  Thus the `& 63` is free.
92
  return static_cast<uint64_t>(__temp >> (__dist & 63));
93
}
94
#else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv
95

96
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
97
  // TRANSITION, VSO-634761
98
  // The casts here help MSVC to avoid calls to the __allmul library function.
99
  const uint32_t __aLo = static_cast<uint32_t>(__a);
100
  const uint32_t __aHi = static_cast<uint32_t>(__a >> 32);
101
  const uint32_t __bLo = static_cast<uint32_t>(__b);
102
  const uint32_t __bHi = static_cast<uint32_t>(__b >> 32);
103

104
  const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo;
105
  const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi;
106
  const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo;
107
  const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi;
108

109
  const uint32_t __b00Lo = static_cast<uint32_t>(__b00);
110
  const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32);
111

112
  const uint64_t __mid1 = __b10 + __b00Hi;
113
  const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1);
114
  const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32);
115

116
  const uint64_t __mid2 = __b01 + __mid1Lo;
117
  const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2);
118
  const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32);
119

120
  const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi;
121
  const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo;
122

123
  *__productHi = __pHi;
124
  return __pLo;
125
}
126

127
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
128
  // We don't need to handle the case __dist >= 64 here (see above).
129
  _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
130
#ifdef _LIBCPP_64_BIT
131
  _LIBCPP_ASSERT_INTERNAL(__dist > 0, "");
132
  return (__hi << (64 - __dist)) | (__lo >> __dist);
133
#else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
134
  // Avoid a 64-bit shift by taking advantage of the range of shift values.
135
  _LIBCPP_ASSERT_INTERNAL(__dist >= 32, "");
136
  return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32));
137
#endif // ^^^ 32-bit ^^^
138
}
139

140
#endif // ^^^ intrinsics unavailable ^^^
141

142
#ifndef _LIBCPP_64_BIT
143

144
// Returns the high 64 bits of the 128-bit product of __a and __b.
145
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) {
146
  // Reuse the __ryu_umul128 implementation.
147
  // Optimizers will likely eliminate the instructions used to compute the
148
  // low part of the product.
149
  uint64_t __hi;
150
  (void) __ryu_umul128(__a, __b, &__hi);
151
  return __hi;
152
}
153

154
// On 32-bit platforms, compilers typically generate calls to library
155
// functions for 64-bit divisions, even if the divisor is a constant.
156
//
157
// TRANSITION, LLVM-37932
158
//
159
// The functions here perform division-by-constant using multiplications
160
// in the same way as 64-bit compilers would do.
161
//
162
// NB:
163
// The multipliers and shift values are the ones generated by clang x64
164
// for expressions like x/5, x/10, etc.
165

166
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {
167
  return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2;
168
}
169

170
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {
171
  return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3;
172
}
173

174
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {
175
  return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2;
176
}
177

178
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {
179
  return __umulh(__x, 0xABCC77118461CEFDu) >> 26;
180
}
181

182
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {
183
  return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11;
184
}
185

186
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {
187
  // Avoid 64-bit math as much as possible.
188
  // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would
189
  // perform 32x64-bit multiplication and 64-bit subtraction.
190
  // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by
191
  // less than 10^9, so their highest 32 bits must be identical,
192
  // so we can truncate both sides to uint32_t before subtracting.
193
  // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)).
194
  // We can truncate before multiplying instead of after, as multiplying
195
  // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits.
196
  return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x));
197
}
198

199
#else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
200

201
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {
202
  return __x / 5;
203
}
204

205
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {
206
  return __x / 10;
207
}
208

209
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {
210
  return __x / 100;
211
}
212

213
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {
214
  return __x / 100000000;
215
}
216

217
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {
218
  return __x / 1000000000;
219
}
220

221
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {
222
  return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x));
223
}
224

225
#endif // ^^^ 64-bit ^^^
226

227
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) {
228
  uint32_t __count = 0;
229
  for (;;) {
230
    _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
231
    const uint64_t __q = __div5(__value);
232
    const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q);
233
    if (__r != 0) {
234
      break;
235
    }
236
    __value = __q;
237
    ++__count;
238
  }
239
  return __count;
240
}
241

242
// Returns true if __value is divisible by 5^__p.
243
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) {
244
  // I tried a case distinction on __p, but there was no performance difference.
245
  return __pow5Factor(__value) >= __p;
246
}
247

248
// Returns true if __value is divisible by 2^__p.
249
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
250
  _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
251
  _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
252
  // __builtin_ctzll doesn't appear to be faster here.
253
  return (__value & ((1ull << __p) - 1)) == 0;
254
}
255

256
_LIBCPP_END_NAMESPACE_STD
257

258
// clang-format on
259

260
#endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
261

262
Product

Resources

Company