Path: blob/main/contrib/llvm-project/libcxx/src/include/ryu/d2s_intrinsics.h
35263 views
//===----------------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78// Copyright (c) Microsoft Corporation.9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception1011// Copyright 2018 Ulf Adams12// Copyright (c) Microsoft Corporation. All rights reserved.1314// Boost Software License - Version 1.0 - August 17th, 20031516// Permission is hereby granted, free of charge, to any person or organization17// obtaining a copy of the software and accompanying documentation covered by18// this license (the "Software") to use, reproduce, display, distribute,19// execute, and transmit the Software, and to prepare derivative works of the20// Software, and to permit third-parties to whom the Software is furnished to21// do so, all subject to the following:2223// The copyright notices in the Software and this entire statement, including24// the above license grant, this restriction and the following disclaimer,25// must be included in all copies of the Software, in whole or in part, and26// all derivative works of the Software, unless such copies or derivative27// works are solely in the form of machine-executable object code generated by28// a source language processor.2930// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR31// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,32// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT33// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE34// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,35// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER36// DEALINGS IN THE SOFTWARE.3738#ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H39#define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H4041// Avoid formatting to keep the changes with the original code minimal.42// clang-format off4344#include <__assert>45#include <__config>4647#include "include/ryu/ryu.h"4849_LIBCPP_BEGIN_NAMESPACE_STD5051#if defined(_M_X64) && defined(_MSC_VER)52#define _LIBCPP_INTRINSIC128 153[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {54return _umul128(__a, __b, __productHi);55}5657[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {58// For the __shiftright128 intrinsic, the shift value is always59// modulo 64.60// In the current implementation of the double-precision version61// of Ryu, the shift value is always < 64.62// (The shift value is in the range [49, 58].)63// Check this here in case a future change requires larger shift64// values. In this case this function needs to be adjusted.65_LIBCPP_ASSERT_INTERNAL(__dist < 64, "");66return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist));67}6869// ^^^ intrinsics available ^^^ / vvv __int128 available vvv70#elif defined(__SIZEOF_INT128__) && ( \71(defined(__clang__) && !defined(_MSC_VER)) || \72(defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__)))73#define _LIBCPP_INTRINSIC128 174// We have __uint128 support in clang or gcc75[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {76auto __temp = __a * (unsigned __int128)__b;77*__productHi = __temp >> 64;78return static_cast<uint64_t>(__temp);79}8081[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {82// In the current implementation of the double-precision version83// of Ryu, the shift value is always < 64.84// (The shift value is in the range [49, 58].)85// Check this here in case a future change requires larger shift86// values. In this case this function needs to be adjusted.87_LIBCPP_ASSERT_INTERNAL(__dist < 64, "");88auto __temp = __lo | ((unsigned __int128)__hi << 64);89// For x64 128-bit shfits using the `shrd` instruction and two 64-bit90// registers, the shift value is modulo 64. Thus the `& 63` is free.91return static_cast<uint64_t>(__temp >> (__dist & 63));92}93#else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv9495[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {96// TRANSITION, VSO-63476197// The casts here help MSVC to avoid calls to the __allmul library function.98const uint32_t __aLo = static_cast<uint32_t>(__a);99const uint32_t __aHi = static_cast<uint32_t>(__a >> 32);100const uint32_t __bLo = static_cast<uint32_t>(__b);101const uint32_t __bHi = static_cast<uint32_t>(__b >> 32);102103const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo;104const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi;105const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo;106const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi;107108const uint32_t __b00Lo = static_cast<uint32_t>(__b00);109const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32);110111const uint64_t __mid1 = __b10 + __b00Hi;112const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1);113const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32);114115const uint64_t __mid2 = __b01 + __mid1Lo;116const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2);117const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32);118119const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi;120const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo;121122*__productHi = __pHi;123return __pLo;124}125126[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {127// We don't need to handle the case __dist >= 64 here (see above).128_LIBCPP_ASSERT_INTERNAL(__dist < 64, "");129#ifdef _LIBCPP_64_BIT130_LIBCPP_ASSERT_INTERNAL(__dist > 0, "");131return (__hi << (64 - __dist)) | (__lo >> __dist);132#else // ^^^ 64-bit ^^^ / vvv 32-bit vvv133// Avoid a 64-bit shift by taking advantage of the range of shift values.134_LIBCPP_ASSERT_INTERNAL(__dist >= 32, "");135return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32));136#endif // ^^^ 32-bit ^^^137}138139#endif // ^^^ intrinsics unavailable ^^^140141#ifndef _LIBCPP_64_BIT142143// Returns the high 64 bits of the 128-bit product of __a and __b.144[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) {145// Reuse the __ryu_umul128 implementation.146// Optimizers will likely eliminate the instructions used to compute the147// low part of the product.148uint64_t __hi;149(void) __ryu_umul128(__a, __b, &__hi);150return __hi;151}152153// On 32-bit platforms, compilers typically generate calls to library154// functions for 64-bit divisions, even if the divisor is a constant.155//156// TRANSITION, LLVM-37932157//158// The functions here perform division-by-constant using multiplications159// in the same way as 64-bit compilers would do.160//161// NB:162// The multipliers and shift values are the ones generated by clang x64163// for expressions like x/5, x/10, etc.164165[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {166return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2;167}168169[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {170return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3;171}172173[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {174return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2;175}176177[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {178return __umulh(__x, 0xABCC77118461CEFDu) >> 26;179}180181[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {182return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11;183}184185[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {186// Avoid 64-bit math as much as possible.187// Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would188// perform 32x64-bit multiplication and 64-bit subtraction.189// __x and 1000000000 * __div1e9(__x) are guaranteed to differ by190// less than 10^9, so their highest 32 bits must be identical,191// so we can truncate both sides to uint32_t before subtracting.192// We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)).193// We can truncate before multiplying instead of after, as multiplying194// the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits.195return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x));196}197198#else // ^^^ 32-bit ^^^ / vvv 64-bit vvv199200[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {201return __x / 5;202}203204[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {205return __x / 10;206}207208[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {209return __x / 100;210}211212[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {213return __x / 100000000;214}215216[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {217return __x / 1000000000;218}219220[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {221return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x));222}223224#endif // ^^^ 64-bit ^^^225226[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) {227uint32_t __count = 0;228for (;;) {229_LIBCPP_ASSERT_INTERNAL(__value != 0, "");230const uint64_t __q = __div5(__value);231const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q);232if (__r != 0) {233break;234}235__value = __q;236++__count;237}238return __count;239}240241// Returns true if __value is divisible by 5^__p.242[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) {243// I tried a case distinction on __p, but there was no performance difference.244return __pow5Factor(__value) >= __p;245}246247// Returns true if __value is divisible by 2^__p.248[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {249_LIBCPP_ASSERT_INTERNAL(__value != 0, "");250_LIBCPP_ASSERT_INTERNAL(__p < 64, "");251// __builtin_ctzll doesn't appear to be faster here.252return (__value & ((1ull << __p) - 1)) == 0;253}254255_LIBCPP_END_NAMESPACE_STD256257// clang-format on258259#endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H260261262