Path: blob/main/contrib/arm-optimized-routines/math/aarch64/experimental/erfc_1u8.c
48375 views
/*1* Double-precision erfc(x) function.2*3* Copyright (c) 2023-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#include "math_config.h"8#include "test_sig.h"9#include "test_defs.h"1011#define Shift 0x1p4512#define P20 0x1.5555555555555p-2 /* 1/3. */13#define P21 0x1.5555555555555p-1 /* 2/3. */1415#define P40 0x1.999999999999ap-4 /* 1/10. */16#define P41 0x1.999999999999ap-2 /* 2/5. */17#define P42 0x1.11111111111111p-3 /* 2/15. */1819#define P50 0x1.5555555555555p-3 /* 1/6. */20#define P51 0x1.c71c71c71c71cp-3 /* 2/9. */21#define P52 0x1.6c16c16c16c17p-5 /* 2/45. */2223/* Qi = (i+1) / i. */24#define Q5 0x1.3333333333333p025#define Q6 0x1.2aaaaaaaaaaabp026#define Q7 0x1.2492492492492p027#define Q8 0x1.2p028#define Q9 0x1.1c71c71c71c72p02930/* Ri = -2 * i / ((i+1)*(i+2)). */31#define R5 -0x1.e79e79e79e79ep-332#define R6 -0x1.b6db6db6db6dbp-333#define R7 -0x1.8e38e38e38e39p-334#define R8 -0x1.6c16c16c16c17p-335#define R9 -0x1.4f2094f2094f2p-33637/* Fast erfc approximation based on series expansion near x rounded to38nearest multiple of 1/128.39Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,4041erfc(x) ~ erfc(r) - scale * d * poly(r, d), with4243poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^344+ (2/15 r^4 - 2/5 r^2 + 1/10) d^445- r * (2/45 r^4 - 2/9 r^2 + 1/6) d^546+ p6(r) d^6 + ... + p10(r) d^104748Polynomials p6(r) to p10(r) are computed using recurrence relation49502(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,51with p0 = 1, and p1(r) = -r.5253Values of erfc(r) and scale(r) are read from lookup tables. Stored values54are scaled to avoid hitting the subnormal range.5556Note that for x < 0, erfc(x) = 2.0 - erfc(-x).5758Maximum measured error: 1.71 ULP59erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-60860want 0x1.e15fcbea3e7adp-608. */61double62erfc (double x)63{64/* Get top words and sign. */65uint64_t ix = asuint64 (x);66uint64_t ia = ix & 0x7fffffffffffffff;67double a = asdouble (ia);68uint64_t sign = ix & ~0x7fffffffffffffff;6970/* erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2. */71if (unlikely (ia >= 0x7ff0000000000000))72return asdouble (sign >> 1) + 1.0 / x; /* Special cases. */7374/* Return early for large enough negative values. */75if (x < -6.0)76return 2.0;7778/* For |x| < 3487.0/128.0, the following approximation holds. */79if (likely (ia < 0x403b3e0000000000))80{81/* |x| < 0x1p-511 => accurate to 0.5 ULP. */82if (unlikely (ia < asuint64 (0x1p-511)))83return 1.0 - x;8485/* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale86to 2/sqrt(pi), when x reduced to r = 0. */87double z = a + Shift;88uint64_t i = asuint64 (z) - asuint64 (Shift);89double r = z - Shift;90/* These values are scaled by 2^128. */91double erfcr = __v_erfc_data.tab[i].erfc;92double scale = __v_erfc_data.tab[i].scale;9394/* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */95double d = a - r;96double d2 = d * d;97double r2 = r * r;98/* Compute p_i as a regular (low-order) polynomial. */99double p1 = -r;100double p2 = fma (P21, r2, -P20);101double p3 = -r * fma (P20, r2, -0.5);102double p4 = fma (fma (P42, r2, -P41), r2, P40);103double p5 = -r * fma (fma (P52, r2, -P51), r2, P50);104/* Compute p_i using recurrence relation:105p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */106double p6 = fma (Q5 * r, p5, p4) * R5;107double p7 = fma (Q6 * r, p6, p5) * R6;108double p8 = fma (Q7 * r, p7, p6) * R7;109double p9 = fma (Q8 * r, p8, p7) * R8;110double p10 = fma (Q9 * r, p9, p8) * R9;111/* Compute polynomial in d using pairwise Horner scheme. */112double p90 = fma (p10, d, p9);113double p78 = fma (p8, d, p7);114double p56 = fma (p6, d, p5);115double p34 = fma (p4, d, p3);116double p12 = fma (p2, d, p1);117double y = fma (p90, d2, p78);118y = fma (y, d2, p56);119y = fma (y, d2, p34);120y = fma (y, d2, p12);121122y = fma (-fma (y, d2, d), scale, erfcr);123124/* Handle sign and scale back in a single fma. */125double off = asdouble (sign >> 1);126double fac = asdouble (asuint64 (0x1p-128) | sign);127y = fma (y, fac, off);128129if (unlikely (x > 26.0))130{131/* The underflow exception needs to be signaled explicitly when132result gets into the subnormal range. */133if (unlikely (y < 0x1p-1022))134force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);135/* Set errno to ERANGE if result rounds to 0. */136return __math_check_uflow (y);137}138139return y;140}141/* Above the threshold (x > 3487.0/128.0) erfc is constant and needs to raise142underflow exception for positive x. */143return __math_uflow (0);144}145146TEST_SIG (S, D, 1, erfc, -6.0, 28.0)147TEST_ULP (erfc, 1.21)148TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)149TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)150TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)151TEST_INTERVAL (erfc, 28.0, inf, 40000)152TEST_INTERVAL (erfc, -6.0, -inf, 40000)153154155