Path: blob/main/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvl.c
48375 views
/*1* Extended precision inverse error function.2*3* Copyright (c) 2023-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/6#define _GNU_SOURCE7#include <math.h>8#include <stdbool.h>9#include <float.h>1011#include "math_config.h"12#include "poly_scalar_f64.h"1314#define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l15#define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l1617const static struct18{19/* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the20coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs21of the denominator. */22double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];23} data = {24.P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,25-0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,260x1.689181bbafd0cp-3 },27.Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,28-0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,290x1p+0 },30.P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,310x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,32-0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },33.Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,340x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,35-0x1.4075c56404eecp+3, 0x1p+0 },36.P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,370x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,380x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },39.Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,400x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,410x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,420x1p+0 }43};4445/* Inverse error function approximation, based on rational approximation as46described in47J. M. Blair, C. A. Edwards, and J. H. Johnson,48"Rational Chebyshev approximations for the inverse of the error function",49Math. Comp. 30, pp. 827--830 (1976).50https://doi.org/10.1090/S0025-5718-1976-0421040-7. */51static inline double52__erfinv (double x)53{54if (x == 1.0)55return __math_oflow (0);56if (x == -1.0)57return __math_oflow (1);5859double a = fabs (x);60if (a > 1)61return __math_invalid (x);6263if (a <= 0.75)64{65double t = x * x - 0.5625;66return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);67}6869if (a <= 0.9375)70{71double t = x * x - 0.87890625;72return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);73}7475double t = 1.0 / (sqrtl (-log1pl (-a)));76return horner_8_f64 (t, data.P_57)77/ (copysign (t, x) * horner_9_f64 (t, data.Q_57));78}7980/* Extended-precision variant, which uses the above (or asymptotic estimate) as81starting point for Newton refinement. This implementation is a port to C of82the version in the SpecialFunctions.jl Julia package, with relaxed stopping83criteria for the Newton refinement. */84long double85erfinvl (long double x)86{87if (x == 0)88return 0;8990double yf = __erfinv (x);91long double y;92if (isfinite (yf))93y = yf;94else95{96/* Double overflowed, use asymptotic estimate instead. */97y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x);98if (!isfinite (y))99return y;100}101102double eps = fabs (yf - nextafter (yf, 0));103while (true)104{105long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y);106y -= dy;107/* Stopping criterion is different to Julia implementation, but is enough108to ensure result is accurate when rounded to double-precision. */109if (fabsl (dy) < eps)110break;111}112return y;113}114115116