Path: blob/main/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_2u.c
48378 views
/*1* Double-precision log(1+x) function.2*3* Copyright (c) 2022-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#include "poly_scalar_f64.h"8#include "math_config.h"9#include "test_sig.h"10#include "test_defs.h"1112#define Ln2Hi 0x1.62e42fefa3800p-113#define Ln2Lo 0x1.ef35793c76730p-4514#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */15#define OneMHfRt2Top \160x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */17#define OneTop12 0x3ff18#define BottomMask 0xffffffff19#define OneMHfRt2 0x3fd2bec33301886620#define Rt2MOne 0x3fda827999fcef3221#define AbsMask 0x7fffffffffffffff22#define ExpM63 0x3c002324static inline double25eval_poly (double f)26{27double f2 = f * f;28double f4 = f2 * f2;29double f8 = f4 * f4;30return estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs);31}3233/* log1p approximation using polynomial on reduced interval. Largest34observed errors are near the lower boundary of the region where k35is 0.36Maximum measured error: 1.75ULP.37log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-238want -0x1.65fb8659a2f92p-2. */39double40log1p (double x)41{42uint64_t ix = asuint64 (x);43uint64_t ia = ix & AbsMask;44uint32_t ia16 = ia >> 48;4546/* Handle special cases first. */47if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff000000000000048|| ix == 0x8000000000000000))49{50if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)51{52/* x == -0 => log1p(x) = -0.53x == Inf => log1p(x) = Inf. */54return x;55}56if (ix == 0xbff0000000000000)57{58/* x == -1 => log1p(x) = -Inf. */59return __math_divzero (-1);60;61}62if (ia16 >= 0x7ff0)63{64/* x == +/-NaN => log1p(x) = NaN. */65return __math_invalid (asdouble (ia));66}67/* x < -1 => log1p(x) = NaN.68x == -Inf => log1p(x) = NaN. */69return __math_invalid (x);70}7172/* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f73is in [sqrt(2)/2, sqrt(2)]):74log1p(x) = k*log(2) + log1p(f).7576f may not be representable exactly, so we need a correction term:77let m = round(1 + x), c = (1 + x) - m.78c << m: at very small x, log1p(x) ~ x, hence:79log(1+x) - log(m) ~ c/m.8081We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */8283uint64_t sign = ix & ~AbsMask;84if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))85{86if (unlikely (ia16 <= ExpM63))87{88/* If exponent of x <= -63 then shortcut the polynomial and avoid89underflow by just returning x, which is exactly rounded in this90region. */91return x;92}93/* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the94logic below, as k = 0 and f = x and therefore representable exactly.95All we need is to return the polynomial. */96return fma (x, eval_poly (x) * x, x);97}9899/* Obtain correctly scaled k by manipulation in the exponent. */100double m = x + 1;101uint64_t mi = asuint64 (m);102uint32_t u = (mi >> 32) + OneMHfRt2Top;103int32_t k = (int32_t) (u >> 20) - OneTop12;104105/* Correction term c/m. */106double cm = (x - (m - 1)) / m;107108/* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */109uint32_t utop = (u & 0x000fffff) + HfRt2Top;110uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);111double f = asdouble (u_red) - 1;112113/* Approximate log1p(x) on the reduced input using a polynomial. Because114log1p(0)=0 we choose an approximation of the form:115x + C0*x^2 + C1*x^3 + C2x^4 + ...116Hence approximation has the form f + f^2 * P(f)117where P(x) = C0 + C1*x + C2x^2 + ... */118double p = fma (f, eval_poly (f) * f, f);119120double kd = k;121double y = fma (Ln2Lo, kd, cm);122return y + fma (Ln2Hi, kd, p);123}124125TEST_SIG (S, D, 1, log1p, -0.9, 10.0)126TEST_ULP (log1p, 1.26)127TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)128TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)129TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)130TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)131132133